diff --git a/hcatalog/build-support/checkstyle/coding_style.xml b/hcatalog/build-support/checkstyle/coding_style.xml index c90f3ce..3bbeddc 100644 --- a/hcatalog/build-support/checkstyle/coding_style.xml +++ b/hcatalog/build-support/checkstyle/coding_style.xml @@ -58,6 +58,7 @@ imposed on others. + diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatCli.java b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatCli.java index 574851f..14bf1c4 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatCli.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatCli.java @@ -57,278 +57,278 @@ */ public class HCatCli { - @SuppressWarnings("static-access") - public static void main(String[] args) { + @SuppressWarnings("static-access") + public static void main(String[] args) { - try { - LogUtils.initHiveLog4j(); - } catch (LogInitializationException e) { + try { + LogUtils.initHiveLog4j(); + } catch (LogInitializationException e) { - } - - CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class)); - ss.in = System.in; - try { - ss.out = new PrintStream(System.out, true, "UTF-8"); - ss.err = new PrintStream(System.err, true, "UTF-8"); - } catch (UnsupportedEncodingException e) { - System.exit(1); - } - - HiveConf conf = ss.getConf(); - - HiveConf.setVar(conf, ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); - - SessionState.start(ss); - - Options options = new Options(); - - // -e 'quoted-query-string' - options.addOption(OptionBuilder - .hasArg() - .withArgName("exec") - .withDescription("hcat command given from command line") - .create('e')); - - // -f - options.addOption(OptionBuilder - .hasArg() - .withArgName("file") - .withDescription("hcat commands in file") - .create('f')); - - // -g - options.addOption(OptionBuilder - .hasArg(). - withArgName("group"). - withDescription("group for the db/table specified in CREATE statement"). - create('g')); - - // -p - options.addOption(OptionBuilder - .hasArg() - .withArgName("perms") - .withDescription("permissions for the db/table specified in CREATE statement") - .create('p')); - - // -D - options.addOption(OptionBuilder - .hasArgs(2) - .withArgName("property=value") - .withValueSeparator() - .withDescription("use hadoop value for given property") - .create('D')); - - // [-h|--help] - options.addOption(new Option("h", "help", false, "Print help information")); - - Parser parser = new GnuParser(); - CommandLine cmdLine = null; - - try { - cmdLine = parser.parse(options, args); - - } catch (ParseException e) { - printUsage(options, ss.err); - System.exit(1); - } - // -e - String execString = (String) cmdLine.getOptionValue('e'); - // -f - String fileName = (String) cmdLine.getOptionValue('f'); - // -h - if (cmdLine.hasOption('h')) { - printUsage(options, ss.out); - System.exit(0); - } - - if (execString != null && fileName != null) { - ss.err.println("The '-e' and '-f' options cannot be specified simultaneously"); - printUsage(options, ss.err); - System.exit(1); - } - - // -p - String perms = (String) cmdLine.getOptionValue('p'); - if (perms != null) { - validatePermissions(ss, conf, perms); - } - - // -g - String grp = (String) cmdLine.getOptionValue('g'); - if (grp != null) { - conf.set(HCatConstants.HCAT_GROUP, grp); - } - - // -D - setConfProperties(conf, cmdLine.getOptionProperties("D")); - - if (execString != null) { - System.exit(processLine(execString)); - } + } - try { - if (fileName != null) { - System.exit(processFile(fileName)); - } - } catch (FileNotFoundException e) { - ss.err.println("Input file not found. (" + e.getMessage() + ")"); - System.exit(1); - } catch (IOException e) { - ss.err.println("Could not open input file for reading. (" + e.getMessage() + ")"); - System.exit(1); - } + CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class)); + ss.in = System.in; + try { + ss.out = new PrintStream(System.out, true, "UTF-8"); + ss.err = new PrintStream(System.err, true, "UTF-8"); + } catch (UnsupportedEncodingException e) { + System.exit(1); + } - // -h - printUsage(options, ss.err); - System.exit(1); + HiveConf conf = ss.getConf(); + + HiveConf.setVar(conf, ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); + + SessionState.start(ss); + + Options options = new Options(); + + // -e 'quoted-query-string' + options.addOption(OptionBuilder + .hasArg() + .withArgName("exec") + .withDescription("hcat command given from command line") + .create('e')); + + // -f + options.addOption(OptionBuilder + .hasArg() + .withArgName("file") + .withDescription("hcat commands in file") + .create('f')); + + // -g + options.addOption(OptionBuilder + .hasArg(). + withArgName("group"). + withDescription("group for the db/table specified in CREATE statement"). + create('g')); + + // -p + options.addOption(OptionBuilder + .hasArg() + .withArgName("perms") + .withDescription("permissions for the db/table specified in CREATE statement") + .create('p')); + + // -D + options.addOption(OptionBuilder + .hasArgs(2) + .withArgName("property=value") + .withValueSeparator() + .withDescription("use hadoop value for given property") + .create('D')); + + // [-h|--help] + options.addOption(new Option("h", "help", false, "Print help information")); + + Parser parser = new GnuParser(); + CommandLine cmdLine = null; + + try { + cmdLine = parser.parse(options, args); + + } catch (ParseException e) { + printUsage(options, ss.err); + System.exit(1); + } + // -e + String execString = (String) cmdLine.getOptionValue('e'); + // -f + String fileName = (String) cmdLine.getOptionValue('f'); + // -h + if (cmdLine.hasOption('h')) { + printUsage(options, ss.out); + System.exit(0); } - private static void setConfProperties(HiveConf conf, Properties props) { - for (java.util.Map.Entry e : props.entrySet()) - conf.set((String) e.getKey(), (String) e.getValue()); + if (execString != null && fileName != null) { + ss.err.println("The '-e' and '-f' options cannot be specified simultaneously"); + printUsage(options, ss.err); + System.exit(1); } - private static int processLine(String line) { - int ret = 0; + // -p + String perms = (String) cmdLine.getOptionValue('p'); + if (perms != null) { + validatePermissions(ss, conf, perms); + } - String command = ""; - for (String oneCmd : line.split(";")) { + // -g + String grp = (String) cmdLine.getOptionValue('g'); + if (grp != null) { + conf.set(HCatConstants.HCAT_GROUP, grp); + } - if (StringUtils.endsWith(oneCmd, "\\")) { - command += StringUtils.chop(oneCmd) + ";"; - continue; - } else { - command += oneCmd; - } - if (StringUtils.isBlank(command)) { - continue; - } + // -D + setConfProperties(conf, cmdLine.getOptionProperties("D")); - ret = processCmd(command); - command = ""; - } - return ret; + if (execString != null) { + System.exit(processLine(execString)); } - private static int processFile(String fileName) throws IOException { - FileReader fileReader = null; - BufferedReader reader = null; - try { - fileReader = new FileReader(fileName); - reader = new BufferedReader(fileReader); - String line; - StringBuilder qsb = new StringBuilder(); - - while ((line = reader.readLine()) != null) { - qsb.append(line + "\n"); - } - - return (processLine(qsb.toString())); - } finally { - if (fileReader != null) { - fileReader.close(); - } - if (reader != null) { - reader.close(); - } - } + try { + if (fileName != null) { + System.exit(processFile(fileName)); + } + } catch (FileNotFoundException e) { + ss.err.println("Input file not found. (" + e.getMessage() + ")"); + System.exit(1); + } catch (IOException e) { + ss.err.println("Could not open input file for reading. (" + e.getMessage() + ")"); + System.exit(1); } - private static int processCmd(String cmd) { - - SessionState ss = SessionState.get(); - long start = System.currentTimeMillis(); + // -h + printUsage(options, ss.err); + System.exit(1); + } + + private static void setConfProperties(HiveConf conf, Properties props) { + for (java.util.Map.Entry e : props.entrySet()) + conf.set((String) e.getKey(), (String) e.getValue()); + } + + private static int processLine(String line) { + int ret = 0; + + String command = ""; + for (String oneCmd : line.split(";")) { + + if (StringUtils.endsWith(oneCmd, "\\")) { + command += StringUtils.chop(oneCmd) + ";"; + continue; + } else { + command += oneCmd; + } + if (StringUtils.isBlank(command)) { + continue; + } + + ret = processCmd(command); + command = ""; + } + return ret; + } + + private static int processFile(String fileName) throws IOException { + FileReader fileReader = null; + BufferedReader reader = null; + try { + fileReader = new FileReader(fileName); + reader = new BufferedReader(fileReader); + String line; + StringBuilder qsb = new StringBuilder(); + + while ((line = reader.readLine()) != null) { + qsb.append(line + "\n"); + } + + return (processLine(qsb.toString())); + } finally { + if (fileReader != null) { + fileReader.close(); + } + if (reader != null) { + reader.close(); + } + } + } - cmd = cmd.trim(); - String firstToken = cmd.split("\\s+")[0].trim(); + private static int processCmd(String cmd) { - if (firstToken.equalsIgnoreCase("set")) { - return new SetProcessor().run(cmd.substring(firstToken.length()).trim()).getResponseCode(); - } else if (firstToken.equalsIgnoreCase("dfs")) { - return new DfsProcessor(ss.getConf()).run(cmd.substring(firstToken.length()).trim()).getResponseCode(); - } + SessionState ss = SessionState.get(); + long start = System.currentTimeMillis(); - HCatDriver driver = new HCatDriver(); + cmd = cmd.trim(); + String firstToken = cmd.split("\\s+")[0].trim(); - int ret = driver.run(cmd).getResponseCode(); + if (firstToken.equalsIgnoreCase("set")) { + return new SetProcessor().run(cmd.substring(firstToken.length()).trim()).getResponseCode(); + } else if (firstToken.equalsIgnoreCase("dfs")) { + return new DfsProcessor(ss.getConf()).run(cmd.substring(firstToken.length()).trim()).getResponseCode(); + } - if (ret != 0) { - driver.close(); - System.exit(ret); - } + HCatDriver driver = new HCatDriver(); - ArrayList res = new ArrayList(); - try { - while (driver.getResults(res)) { - for (String r : res) { - ss.out.println(r); - } - res.clear(); - } - } catch (IOException e) { - ss.err.println("Failed with exception " + e.getClass().getName() + ":" - + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); - ret = 1; - } catch (CommandNeedRetryException e) { - ss.err.println("Failed with exception " + e.getClass().getName() + ":" - + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); - ret = 1; - } + int ret = driver.run(cmd).getResponseCode(); - int cret = driver.close(); - if (ret == 0) { - ret = cret; - } + if (ret != 0) { + driver.close(); + System.exit(ret); + } - long end = System.currentTimeMillis(); - if (end > start) { - double timeTaken = (end - start) / 1000.0; - ss.err.println("Time taken: " + timeTaken + " seconds"); + ArrayList res = new ArrayList(); + try { + while (driver.getResults(res)) { + for (String r : res) { + ss.out.println(r); } - return ret; + res.clear(); + } + } catch (IOException e) { + ss.err.println("Failed with exception " + e.getClass().getName() + ":" + + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + ret = 1; + } catch (CommandNeedRetryException e) { + ss.err.println("Failed with exception " + e.getClass().getName() + ":" + + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + ret = 1; } - private static void printUsage(Options options, OutputStream os) { - PrintWriter pw = new PrintWriter(os); - new HelpFormatter().printHelp(pw, 2 * HelpFormatter.DEFAULT_WIDTH, - "hcat { -e \"\" | -f \"\" } [ -g \"\" ] [ -p \"\" ] [ -D\"=\" ]", - null, options, HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, - null, false); - pw.flush(); + int cret = driver.close(); + if (ret == 0) { + ret = cret; } - private static void validatePermissions(CliSessionState ss, HiveConf conf, String perms) { - perms = perms.trim(); - FsPermission fp = null; - - if (perms.matches("^\\s*([r,w,x,-]{9})\\s*$")) { - fp = FsPermission.valueOf("d" + perms); - } else if (perms.matches("^\\s*([0-7]{3})\\s*$")) { - fp = new FsPermission(Short.decode("0" + perms)); - } else { - ss.err.println("Invalid permission specification: " + perms); - System.exit(1); - } + long end = System.currentTimeMillis(); + if (end > start) { + double timeTaken = (end - start) / 1000.0; + ss.err.println("Time taken: " + timeTaken + " seconds"); + } + return ret; + } + + private static void printUsage(Options options, OutputStream os) { + PrintWriter pw = new PrintWriter(os); + new HelpFormatter().printHelp(pw, 2 * HelpFormatter.DEFAULT_WIDTH, + "hcat { -e \"\" | -f \"\" } [ -g \"\" ] [ -p \"\" ] [ -D\"=\" ]", + null, options, HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, + null, false); + pw.flush(); + } + + private static void validatePermissions(CliSessionState ss, HiveConf conf, String perms) { + perms = perms.trim(); + FsPermission fp = null; + + if (perms.matches("^\\s*([r,w,x,-]{9})\\s*$")) { + fp = FsPermission.valueOf("d" + perms); + } else if (perms.matches("^\\s*([0-7]{3})\\s*$")) { + fp = new FsPermission(Short.decode("0" + perms)); + } else { + ss.err.println("Invalid permission specification: " + perms); + System.exit(1); + } - if (!HCatUtil.validateMorePermissive(fp.getUserAction(), fp.getGroupAction())) { - ss.err.println("Invalid permission specification: " + perms + " : user permissions must be more permissive than group permission "); - System.exit(1); - } - if (!HCatUtil.validateMorePermissive(fp.getGroupAction(), fp.getOtherAction())) { - ss.err.println("Invalid permission specification: " + perms + " : group permissions must be more permissive than other permission "); - System.exit(1); - } - if ((!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getUserAction())) || - (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getGroupAction())) || - (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getOtherAction()))) { - ss.err.println("Invalid permission specification: " + perms + " : permissions must have execute permissions if read or write permissions are specified "); - System.exit(1); - } + if (!HCatUtil.validateMorePermissive(fp.getUserAction(), fp.getGroupAction())) { + ss.err.println("Invalid permission specification: " + perms + " : user permissions must be more permissive than group permission "); + System.exit(1); + } + if (!HCatUtil.validateMorePermissive(fp.getGroupAction(), fp.getOtherAction())) { + ss.err.println("Invalid permission specification: " + perms + " : group permissions must be more permissive than other permission "); + System.exit(1); + } + if ((!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getUserAction())) || + (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getGroupAction())) || + (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getOtherAction()))) { + ss.err.println("Invalid permission specification: " + perms + " : permissions must have execute permissions if read or write permissions are specified "); + System.exit(1); + } - conf.set(HCatConstants.HCAT_PERMS, "d" + fp.toString()); + conf.set(HCatConstants.HCAT_PERMS, "d" + fp.toString()); - } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatDriver.java b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatDriver.java index d06f555..f62b818 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatDriver.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatDriver.java @@ -38,109 +38,109 @@ */ public class HCatDriver extends Driver { - @Override - public CommandProcessorResponse run(String command) { + @Override + public CommandProcessorResponse run(String command) { + + CommandProcessorResponse cpr = null; + try { + cpr = super.run(command); + } catch (CommandNeedRetryException e) { + return new CommandProcessorResponse(-1, e.toString(), ""); + } - CommandProcessorResponse cpr = null; - try { - cpr = super.run(command); - } catch (CommandNeedRetryException e) { - return new CommandProcessorResponse(-1, e.toString(), ""); - } + SessionState ss = SessionState.get(); + + if (cpr.getResponseCode() == 0) { + // Only attempt to do this, if cmd was successful. + int rc = setFSPermsNGrp(ss); + cpr = new CommandProcessorResponse(rc); + } + // reset conf vars + ss.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, ""); + ss.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, ""); - SessionState ss = SessionState.get(); + return cpr; + } - if (cpr.getResponseCode() == 0) { - // Only attempt to do this, if cmd was successful. - int rc = setFSPermsNGrp(ss); - cpr = new CommandProcessorResponse(rc); - } - // reset conf vars - ss.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, ""); - ss.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, ""); + private int setFSPermsNGrp(SessionState ss) { - return cpr; + Configuration conf = ss.getConf(); + + String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, ""); + if (tblName.isEmpty()) { + tblName = conf.get("import.destination.table", ""); + conf.set("import.destination.table", ""); } + String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, ""); + String grp = conf.get(HCatConstants.HCAT_GROUP, null); + String permsStr = conf.get(HCatConstants.HCAT_PERMS, null); - private int setFSPermsNGrp(SessionState ss) { + if (tblName.isEmpty() && dbName.isEmpty()) { + // it wasn't create db/table + return 0; + } - Configuration conf = ss.getConf(); + if (null == grp && null == permsStr) { + // there were no grp and perms to begin with. + return 0; + } - String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, ""); - if (tblName.isEmpty()) { - tblName = conf.get("import.destination.table", ""); - conf.set("import.destination.table", ""); - } - String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, ""); - String grp = conf.get(HCatConstants.HCAT_GROUP, null); - String permsStr = conf.get(HCatConstants.HCAT_PERMS, null); + FsPermission perms = FsPermission.valueOf(permsStr); - if (tblName.isEmpty() && dbName.isEmpty()) { - // it wasn't create db/table - return 0; - } + if (!tblName.isEmpty()) { + Hive db = null; + try { + db = Hive.get(); + Table tbl = db.getTable(tblName); + Path tblPath = tbl.getPath(); - if (null == grp && null == permsStr) { - // there were no grp and perms to begin with. - return 0; + FileSystem fs = tblPath.getFileSystem(conf); + if (null != perms) { + fs.setPermission(tblPath, perms); } - - FsPermission perms = FsPermission.valueOf(permsStr); - - if (!tblName.isEmpty()) { - Hive db = null; - try { - db = Hive.get(); - Table tbl = db.getTable(tblName); - Path tblPath = tbl.getPath(); - - FileSystem fs = tblPath.getFileSystem(conf); - if (null != perms) { - fs.setPermission(tblPath, perms); - } - if (null != grp) { - fs.setOwner(tblPath, null, grp); - } - return 0; - - } catch (Exception e) { - ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage())); - try { // We need to drop the table. - if (null != db) { - db.dropTable(tblName); - } - } catch (HiveException he) { - ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage())); - } - return 1; - } - } else { - // looks like a db operation - if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { - // We dont set perms or groups for default dir. - return 0; - } else { - try { - Hive db = Hive.get(); - Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName)); - FileSystem fs = dbPath.getFileSystem(conf); - if (perms != null) { - fs.setPermission(dbPath, perms); - } - if (null != grp) { - fs.setOwner(dbPath, null, grp); - } - return 0; - } catch (Exception e) { - ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage())); - try { - Hive.get().dropDatabase(dbName); - } catch (Exception e1) { - ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage())); - } - return 1; - } - } + if (null != grp) { + fs.setOwner(tblPath, null, grp); + } + return 0; + + } catch (Exception e) { + ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage())); + try { // We need to drop the table. + if (null != db) { + db.dropTable(tblName); + } + } catch (HiveException he) { + ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage())); + } + return 1; + } + } else { + // looks like a db operation + if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { + // We dont set perms or groups for default dir. + return 0; + } else { + try { + Hive db = Hive.get(); + Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName)); + FileSystem fs = dbPath.getFileSystem(conf); + if (perms != null) { + fs.setPermission(dbPath, perms); + } + if (null != grp) { + fs.setOwner(dbPath, null, grp); + } + return 0; + } catch (Exception e) { + ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage())); + try { + Hive.get().dropDatabase(dbName); + } catch (Exception e1) { + ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage())); + } + return 1; } + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java index 96d0d3d..3393a32 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java @@ -40,60 +40,60 @@ */ final class CreateDatabaseHook extends HCatSemanticAnalyzerBase { - String databaseName; + String databaseName; - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) - throws SemanticException { + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) + throws SemanticException { - Hive db; - try { - db = context.getHive(); - } catch (HiveException e) { - throw new SemanticException("Couldn't get Hive DB instance in semantic analysis phase.", e); - } + Hive db; + try { + db = context.getHive(); + } catch (HiveException e) { + throw new SemanticException("Couldn't get Hive DB instance in semantic analysis phase.", e); + } - // Analyze and create tbl properties object - int numCh = ast.getChildCount(); + // Analyze and create tbl properties object + int numCh = ast.getChildCount(); - databaseName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); + databaseName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); - for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode) ast.getChild(num); + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); - switch (child.getToken().getType()) { + switch (child.getToken().getType()) { - case HiveParser.TOK_IFNOTEXISTS: - try { - List dbs = db.getDatabasesByPattern(databaseName); - if (dbs != null && dbs.size() > 0) { // db exists - return ast; - } - } catch (HiveException e) { - throw new SemanticException(e); - } - break; - } + case HiveParser.TOK_IFNOTEXISTS: + try { + List dbs = db.getDatabasesByPattern(databaseName); + if (dbs != null && dbs.size() > 0) { // db exists + return ast; + } + } catch (HiveException e) { + throw new SemanticException(e); } - - return ast; + break; + } } - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - context.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, databaseName); - super.postAnalyze(context, rootTasks); - } + return ast; + } - @Override - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, - Hive hive, DDLWork work) throws HiveException { - CreateDatabaseDesc createDb = work.getCreateDatabaseDesc(); - if (createDb != null) { - Database db = new Database(createDb.getName(), createDb.getComment(), - createDb.getLocationUri(), createDb.getDatabaseProperties()); - authorize(db, Privilege.CREATE); - } + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + context.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, databaseName); + super.postAnalyze(context, rootTasks); + } + + @Override + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, + Hive hive, DDLWork work) throws HiveException { + CreateDatabaseDesc createDb = work.getCreateDatabaseDesc(); + if (createDb != null) { + Database db = new Database(createDb.getName(), createDb.getComment(), + createDb.getLocationUri(), createDb.getDatabaseProperties()); + authorize(db, Privilege.CREATE); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java index 08ca7ab..791e01b 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java @@ -51,198 +51,198 @@ */ final class CreateTableHook extends HCatSemanticAnalyzerBase { - private String tableName; + private String tableName; + + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, + ASTNode ast) throws SemanticException { + + Hive db; + try { + db = context.getHive(); + } catch (HiveException e) { + throw new SemanticException( + "Couldn't get Hive DB instance in semantic analysis phase.", + e); + } + + // Analyze and create tbl properties object + int numCh = ast.getChildCount(); + + String inputFormat = null, outputFormat = null; + tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast + .getChild(0)); + boolean likeTable = false; + + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); + + switch (child.getToken().getType()) { + + case HiveParser.TOK_QUERY: // CTAS + throw new SemanticException( + "Operation not supported. Create table as " + + "Select is not a valid operation."); + + case HiveParser.TOK_TABLEBUCKETS: + break; + + case HiveParser.TOK_TBLSEQUENCEFILE: + inputFormat = HCatConstants.SEQUENCEFILE_INPUT; + outputFormat = HCatConstants.SEQUENCEFILE_OUTPUT; + break; - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, - ASTNode ast) throws SemanticException { + case HiveParser.TOK_TBLTEXTFILE: + inputFormat = org.apache.hadoop.mapred.TextInputFormat.class.getName(); + outputFormat = org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat.class.getName(); - Hive db; + break; + + case HiveParser.TOK_LIKETABLE: + likeTable = true; + break; + + case HiveParser.TOK_IFNOTEXISTS: try { - db = context.getHive(); + List tables = db.getTablesByPattern(tableName); + if (tables != null && tables.size() > 0) { // table + // exists + return ast; + } } catch (HiveException e) { - throw new SemanticException( - "Couldn't get Hive DB instance in semantic analysis phase.", - e); + throw new SemanticException(e); } + break; - // Analyze and create tbl properties object - int numCh = ast.getChildCount(); - - String inputFormat = null, outputFormat = null; - tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast - .getChild(0)); - boolean likeTable = false; - - for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode) ast.getChild(num); - - switch (child.getToken().getType()) { - - case HiveParser.TOK_QUERY: // CTAS - throw new SemanticException( - "Operation not supported. Create table as " + - "Select is not a valid operation."); - - case HiveParser.TOK_TABLEBUCKETS: - break; - - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = HCatConstants.SEQUENCEFILE_INPUT; - outputFormat = HCatConstants.SEQUENCEFILE_OUTPUT; - break; - - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = org.apache.hadoop.mapred.TextInputFormat.class.getName(); - outputFormat = org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat.class.getName(); - - break; - - case HiveParser.TOK_LIKETABLE: - likeTable = true; - break; - - case HiveParser.TOK_IFNOTEXISTS: - try { - List tables = db.getTablesByPattern(tableName); - if (tables != null && tables.size() > 0) { // table - // exists - return ast; - } - } catch (HiveException e) { - throw new SemanticException(e); - } - break; - - case HiveParser.TOK_TABLEPARTCOLS: - List partCols = BaseSemanticAnalyzer - .getColumns((ASTNode) child.getChild(0), false); - for (FieldSchema fs : partCols) { - if (!fs.getType().equalsIgnoreCase("string")) { - throw new SemanticException( - "Operation not supported. HCatalog only " + - "supports partition columns of type string. " - + "For column: " - + fs.getName() - + " Found type: " + fs.getType()); - } - } - break; - - case HiveParser.TOK_STORAGEHANDLER: - String storageHandler = BaseSemanticAnalyzer - .unescapeSQLString(child.getChild(0).getText()); - if (org.apache.commons.lang.StringUtils - .isNotEmpty(storageHandler)) { - return ast; - } - - break; - - case HiveParser.TOK_TABLEFILEFORMAT: - if (child.getChildCount() < 2) { - throw new SemanticException( - "Incomplete specification of File Format. " + - "You must provide InputFormat, OutputFormat."); - } - inputFormat = BaseSemanticAnalyzer.unescapeSQLString(child - .getChild(0).getText()); - outputFormat = BaseSemanticAnalyzer.unescapeSQLString(child - .getChild(1).getText()); - break; - - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFileInputFormat.class.getName(); - outputFormat = RCFileOutputFormat.class.getName(); - break; - - } + case HiveParser.TOK_TABLEPARTCOLS: + List partCols = BaseSemanticAnalyzer + .getColumns((ASTNode) child.getChild(0), false); + for (FieldSchema fs : partCols) { + if (!fs.getType().equalsIgnoreCase("string")) { + throw new SemanticException( + "Operation not supported. HCatalog only " + + "supports partition columns of type string. " + + "For column: " + + fs.getName() + + " Found type: " + fs.getType()); + } + } + break; + + case HiveParser.TOK_STORAGEHANDLER: + String storageHandler = BaseSemanticAnalyzer + .unescapeSQLString(child.getChild(0).getText()); + if (org.apache.commons.lang.StringUtils + .isNotEmpty(storageHandler)) { + return ast; } - if (!likeTable && (inputFormat == null || outputFormat == null)) { - throw new SemanticException( - "STORED AS specification is either incomplete or incorrect."); + break; + + case HiveParser.TOK_TABLEFILEFORMAT: + if (child.getChildCount() < 2) { + throw new SemanticException( + "Incomplete specification of File Format. " + + "You must provide InputFormat, OutputFormat."); } + inputFormat = BaseSemanticAnalyzer.unescapeSQLString(child + .getChild(0).getText()); + outputFormat = BaseSemanticAnalyzer.unescapeSQLString(child + .getChild(1).getText()); + break; + + case HiveParser.TOK_TBLRCFILE: + inputFormat = RCFileInputFormat.class.getName(); + outputFormat = RCFileOutputFormat.class.getName(); + break; + + } + } + + if (!likeTable && (inputFormat == null || outputFormat == null)) { + throw new SemanticException( + "STORED AS specification is either incomplete or incorrect."); + } + + return ast; + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) + throws SemanticException { + + if (rootTasks.size() == 0) { + // There will be no DDL task created in case if its CREATE TABLE IF + // NOT EXISTS + return; + } + CreateTableDesc desc = ((DDLTask) rootTasks.get(rootTasks.size() - 1)) + .getWork().getCreateTblDesc(); + if (desc == null) { + // Desc will be null if its CREATE TABLE LIKE. Desc will be + // contained in CreateTableLikeDesc. Currently, HCat disallows CTLT in + // pre-hook. So, desc can never be null. + return; + } + Map tblProps = desc.getTblProps(); + if (tblProps == null) { + // tblProps will be null if user didnt use tblprops in his CREATE + // TABLE cmd. + tblProps = new HashMap(); - return ast; } - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) - throws SemanticException { + // first check if we will allow the user to create table. + String storageHandler = desc.getStorageHandler(); + if (StringUtils.isEmpty(storageHandler)) { + } else { + try { + HCatStorageHandler storageHandlerInst = HCatUtil + .getStorageHandler(context.getConf(), + desc.getStorageHandler(), + desc.getSerName(), + desc.getInputFormat(), + desc.getOutputFormat()); + //Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if + //StorageDelegationAuthorizationProvider is used. + } catch (IOException e) { + throw new SemanticException(e); + } + } - if (rootTasks.size() == 0) { - // There will be no DDL task created in case if its CREATE TABLE IF - // NOT EXISTS - return; + if (desc != null) { + try { + Table table = context.getHive().newTable(desc.getTableName()); + if (desc.getLocation() != null) { + table.setDataLocation(new Path(desc.getLocation()).toUri()); } - CreateTableDesc desc = ((DDLTask) rootTasks.get(rootTasks.size() - 1)) - .getWork().getCreateTblDesc(); - if (desc == null) { - // Desc will be null if its CREATE TABLE LIKE. Desc will be - // contained in CreateTableLikeDesc. Currently, HCat disallows CTLT in - // pre-hook. So, desc can never be null. - return; + if (desc.getStorageHandler() != null) { + table.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + desc.getStorageHandler()); } - Map tblProps = desc.getTblProps(); - if (tblProps == null) { - // tblProps will be null if user didnt use tblprops in his CREATE - // TABLE cmd. - tblProps = new HashMap(); - + for (Map.Entry prop : tblProps.entrySet()) { + table.setProperty(prop.getKey(), prop.getValue()); } - - // first check if we will allow the user to create table. - String storageHandler = desc.getStorageHandler(); - if (StringUtils.isEmpty(storageHandler)) { - } else { - try { - HCatStorageHandler storageHandlerInst = HCatUtil - .getStorageHandler(context.getConf(), - desc.getStorageHandler(), - desc.getSerName(), - desc.getInputFormat(), - desc.getOutputFormat()); - //Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if - //StorageDelegationAuthorizationProvider is used. - } catch (IOException e) { - throw new SemanticException(e); - } + for (Map.Entry prop : desc.getSerdeProps().entrySet()) { + table.setSerdeParam(prop.getKey(), prop.getValue()); } + //TODO: set other Table properties as needed - if (desc != null) { - try { - Table table = context.getHive().newTable(desc.getTableName()); - if (desc.getLocation() != null) { - table.setDataLocation(new Path(desc.getLocation()).toUri()); - } - if (desc.getStorageHandler() != null) { - table.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - desc.getStorageHandler()); - } - for (Map.Entry prop : tblProps.entrySet()) { - table.setProperty(prop.getKey(), prop.getValue()); - } - for (Map.Entry prop : desc.getSerdeProps().entrySet()) { - table.setSerdeParam(prop.getKey(), prop.getValue()); - } - //TODO: set other Table properties as needed - - //authorize against the table operation so that location permissions can be checked if any - - if (HiveConf.getBoolVar(context.getConf(), - HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - authorize(table, Privilege.CREATE); - } - } catch (HiveException ex) { - throw new SemanticException(ex); - } - } + //authorize against the table operation so that location permissions can be checked if any - desc.setTblProps(tblProps); - context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName); + if (HiveConf.getBoolVar(context.getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + authorize(table, Privilege.CREATE); + } + } catch (HiveException ex) { + throw new SemanticException(ex); + } } + + desc.setTblProps(tblProps); + context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java index 13d8a0c..0e011e4 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java @@ -54,325 +54,325 @@ */ public class HCatSemanticAnalyzer extends HCatSemanticAnalyzerBase { - private AbstractSemanticAnalyzerHook hook; - private ASTNode ast; - - - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) - throws SemanticException { - - this.ast = ast; - switch (ast.getToken().getType()) { - - // HCat wants to intercept following tokens and special-handle them. - case HiveParser.TOK_CREATETABLE: - hook = new CreateTableHook(); - return hook.preAnalyze(context, ast); - - case HiveParser.TOK_CREATEDATABASE: - hook = new CreateDatabaseHook(); - return hook.preAnalyze(context, ast); - - case HiveParser.TOK_ALTERTABLE_PARTITION: - if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) { - return ast; - } else if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_ALTERPARTS_MERGEFILES) { - // unsupported - throw new SemanticException("Operation not supported."); - } else { - return ast; - } - - // HCat will allow these operations to be performed. - // Database DDL - case HiveParser.TOK_SHOWDATABASES: - case HiveParser.TOK_DROPDATABASE: - case HiveParser.TOK_SWITCHDATABASE: - case HiveParser.TOK_DESCDATABASE: - case HiveParser.TOK_ALTERDATABASE_PROPERTIES: - - // Index DDL - case HiveParser.TOK_ALTERINDEX_PROPERTIES: - case HiveParser.TOK_CREATEINDEX: - case HiveParser.TOK_DROPINDEX: - case HiveParser.TOK_SHOWINDEXES: - - // View DDL - // "alter view add partition" does not work because of the nature of implementation - // of the DDL in hive. Hive will internally invoke another Driver on the select statement, - // and HCat does not let "select" statement through. I cannot find a way to get around it - // without modifying hive code. So just leave it unsupported. - //case HiveParser.TOK_ALTERVIEW_ADDPARTS: - case HiveParser.TOK_ALTERVIEW_DROPPARTS: - case HiveParser.TOK_ALTERVIEW_PROPERTIES: - case HiveParser.TOK_ALTERVIEW_RENAME: - case HiveParser.TOK_CREATEVIEW: - case HiveParser.TOK_DROPVIEW: - - // Authorization DDL - case HiveParser.TOK_CREATEROLE: - case HiveParser.TOK_DROPROLE: - case HiveParser.TOK_GRANT_ROLE: - case HiveParser.TOK_GRANT_WITH_OPTION: - case HiveParser.TOK_GRANT: - case HiveParser.TOK_REVOKE_ROLE: - case HiveParser.TOK_REVOKE: - case HiveParser.TOK_SHOW_GRANT: - case HiveParser.TOK_SHOW_ROLE_GRANT: - - // Misc DDL - case HiveParser.TOK_LOCKTABLE: - case HiveParser.TOK_UNLOCKTABLE: - case HiveParser.TOK_SHOWLOCKS: - case HiveParser.TOK_DESCFUNCTION: - case HiveParser.TOK_SHOWFUNCTIONS: - case HiveParser.TOK_EXPLAIN: - - // Table DDL - case HiveParser.TOK_ALTERTABLE_ADDPARTS: - case HiveParser.TOK_ALTERTABLE_ADDCOLS: - case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: - case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: - case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: - case HiveParser.TOK_ALTERTABLE_DROPPARTS: - case HiveParser.TOK_ALTERTABLE_PROPERTIES: - case HiveParser.TOK_ALTERTABLE_RENAME: - case HiveParser.TOK_ALTERTABLE_RENAMECOL: - case HiveParser.TOK_ALTERTABLE_REPLACECOLS: - case HiveParser.TOK_ALTERTABLE_SERIALIZER: - case HiveParser.TOK_ALTERTABLE_TOUCH: - case HiveParser.TOK_DESCTABLE: - case HiveParser.TOK_DROPTABLE: - case HiveParser.TOK_SHOW_TABLESTATUS: - case HiveParser.TOK_SHOWPARTITIONS: - case HiveParser.TOK_SHOWTABLES: - return ast; - - // In all other cases, throw an exception. Its a white-list of allowed operations. - default: - throw new SemanticException("Operation not supported."); + private AbstractSemanticAnalyzerHook hook; + private ASTNode ast; + + + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) + throws SemanticException { + + this.ast = ast; + switch (ast.getToken().getType()) { + + // HCat wants to intercept following tokens and special-handle them. + case HiveParser.TOK_CREATETABLE: + hook = new CreateTableHook(); + return hook.preAnalyze(context, ast); + + case HiveParser.TOK_CREATEDATABASE: + hook = new CreateDatabaseHook(); + return hook.preAnalyze(context, ast); + + case HiveParser.TOK_ALTERTABLE_PARTITION: + if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) { + return ast; + } else if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_ALTERPARTS_MERGEFILES) { + // unsupported + throw new SemanticException("Operation not supported."); + } else { + return ast; + } + + // HCat will allow these operations to be performed. + // Database DDL + case HiveParser.TOK_SHOWDATABASES: + case HiveParser.TOK_DROPDATABASE: + case HiveParser.TOK_SWITCHDATABASE: + case HiveParser.TOK_DESCDATABASE: + case HiveParser.TOK_ALTERDATABASE_PROPERTIES: + + // Index DDL + case HiveParser.TOK_ALTERINDEX_PROPERTIES: + case HiveParser.TOK_CREATEINDEX: + case HiveParser.TOK_DROPINDEX: + case HiveParser.TOK_SHOWINDEXES: + + // View DDL + // "alter view add partition" does not work because of the nature of implementation + // of the DDL in hive. Hive will internally invoke another Driver on the select statement, + // and HCat does not let "select" statement through. I cannot find a way to get around it + // without modifying hive code. So just leave it unsupported. + //case HiveParser.TOK_ALTERVIEW_ADDPARTS: + case HiveParser.TOK_ALTERVIEW_DROPPARTS: + case HiveParser.TOK_ALTERVIEW_PROPERTIES: + case HiveParser.TOK_ALTERVIEW_RENAME: + case HiveParser.TOK_CREATEVIEW: + case HiveParser.TOK_DROPVIEW: + + // Authorization DDL + case HiveParser.TOK_CREATEROLE: + case HiveParser.TOK_DROPROLE: + case HiveParser.TOK_GRANT_ROLE: + case HiveParser.TOK_GRANT_WITH_OPTION: + case HiveParser.TOK_GRANT: + case HiveParser.TOK_REVOKE_ROLE: + case HiveParser.TOK_REVOKE: + case HiveParser.TOK_SHOW_GRANT: + case HiveParser.TOK_SHOW_ROLE_GRANT: + + // Misc DDL + case HiveParser.TOK_LOCKTABLE: + case HiveParser.TOK_UNLOCKTABLE: + case HiveParser.TOK_SHOWLOCKS: + case HiveParser.TOK_DESCFUNCTION: + case HiveParser.TOK_SHOWFUNCTIONS: + case HiveParser.TOK_EXPLAIN: + + // Table DDL + case HiveParser.TOK_ALTERTABLE_ADDPARTS: + case HiveParser.TOK_ALTERTABLE_ADDCOLS: + case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: + case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: + case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: + case HiveParser.TOK_ALTERTABLE_DROPPARTS: + case HiveParser.TOK_ALTERTABLE_PROPERTIES: + case HiveParser.TOK_ALTERTABLE_RENAME: + case HiveParser.TOK_ALTERTABLE_RENAMECOL: + case HiveParser.TOK_ALTERTABLE_REPLACECOLS: + case HiveParser.TOK_ALTERTABLE_SERIALIZER: + case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_DESCTABLE: + case HiveParser.TOK_DROPTABLE: + case HiveParser.TOK_SHOW_TABLESTATUS: + case HiveParser.TOK_SHOWPARTITIONS: + case HiveParser.TOK_SHOWTABLES: + return ast; + + // In all other cases, throw an exception. Its a white-list of allowed operations. + default: + throw new SemanticException("Operation not supported."); - } } - - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - - try { - - switch (ast.getToken().getType()) { - - case HiveParser.TOK_CREATETABLE: - case HiveParser.TOK_CREATEDATABASE: - case HiveParser.TOK_ALTERTABLE_PARTITION: - - // HCat will allow these operations to be performed. - // Database DDL - case HiveParser.TOK_SHOWDATABASES: - case HiveParser.TOK_DROPDATABASE: - case HiveParser.TOK_SWITCHDATABASE: - case HiveParser.TOK_DESCDATABASE: - case HiveParser.TOK_ALTERDATABASE_PROPERTIES: - - // Index DDL - case HiveParser.TOK_ALTERINDEX_PROPERTIES: - case HiveParser.TOK_CREATEINDEX: - case HiveParser.TOK_DROPINDEX: - case HiveParser.TOK_SHOWINDEXES: - - // View DDL - //case HiveParser.TOK_ALTERVIEW_ADDPARTS: - case HiveParser.TOK_ALTERVIEW_DROPPARTS: - case HiveParser.TOK_ALTERVIEW_PROPERTIES: - case HiveParser.TOK_ALTERVIEW_RENAME: - case HiveParser.TOK_CREATEVIEW: - case HiveParser.TOK_DROPVIEW: - - // Authorization DDL - case HiveParser.TOK_CREATEROLE: - case HiveParser.TOK_DROPROLE: - case HiveParser.TOK_GRANT_ROLE: - case HiveParser.TOK_GRANT_WITH_OPTION: - case HiveParser.TOK_GRANT: - case HiveParser.TOK_REVOKE_ROLE: - case HiveParser.TOK_REVOKE: - case HiveParser.TOK_SHOW_GRANT: - case HiveParser.TOK_SHOW_ROLE_GRANT: - - // Misc DDL - case HiveParser.TOK_LOCKTABLE: - case HiveParser.TOK_UNLOCKTABLE: - case HiveParser.TOK_SHOWLOCKS: - case HiveParser.TOK_DESCFUNCTION: - case HiveParser.TOK_SHOWFUNCTIONS: - case HiveParser.TOK_EXPLAIN: - - // Table DDL - case HiveParser.TOK_ALTERTABLE_ADDPARTS: - case HiveParser.TOK_ALTERTABLE_ADDCOLS: - case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: - case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: - case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: - case HiveParser.TOK_ALTERTABLE_DROPPARTS: - case HiveParser.TOK_ALTERTABLE_PROPERTIES: - case HiveParser.TOK_ALTERTABLE_RENAME: - case HiveParser.TOK_ALTERTABLE_RENAMECOL: - case HiveParser.TOK_ALTERTABLE_REPLACECOLS: - case HiveParser.TOK_ALTERTABLE_SERIALIZER: - case HiveParser.TOK_ALTERTABLE_TOUCH: - case HiveParser.TOK_DESCTABLE: - case HiveParser.TOK_DROPTABLE: - case HiveParser.TOK_SHOW_TABLESTATUS: - case HiveParser.TOK_SHOWPARTITIONS: - case HiveParser.TOK_SHOWTABLES: - break; - - default: - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, "Unexpected token: " + ast.getToken()); - } - - authorizeDDL(context, rootTasks); - - } catch (HCatException e) { - throw new SemanticException(e); - } catch (HiveException e) { - throw new SemanticException(e); - } - - if (hook != null) { - hook.postAnalyze(context, rootTasks); - } + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + + try { + + switch (ast.getToken().getType()) { + + case HiveParser.TOK_CREATETABLE: + case HiveParser.TOK_CREATEDATABASE: + case HiveParser.TOK_ALTERTABLE_PARTITION: + + // HCat will allow these operations to be performed. + // Database DDL + case HiveParser.TOK_SHOWDATABASES: + case HiveParser.TOK_DROPDATABASE: + case HiveParser.TOK_SWITCHDATABASE: + case HiveParser.TOK_DESCDATABASE: + case HiveParser.TOK_ALTERDATABASE_PROPERTIES: + + // Index DDL + case HiveParser.TOK_ALTERINDEX_PROPERTIES: + case HiveParser.TOK_CREATEINDEX: + case HiveParser.TOK_DROPINDEX: + case HiveParser.TOK_SHOWINDEXES: + + // View DDL + //case HiveParser.TOK_ALTERVIEW_ADDPARTS: + case HiveParser.TOK_ALTERVIEW_DROPPARTS: + case HiveParser.TOK_ALTERVIEW_PROPERTIES: + case HiveParser.TOK_ALTERVIEW_RENAME: + case HiveParser.TOK_CREATEVIEW: + case HiveParser.TOK_DROPVIEW: + + // Authorization DDL + case HiveParser.TOK_CREATEROLE: + case HiveParser.TOK_DROPROLE: + case HiveParser.TOK_GRANT_ROLE: + case HiveParser.TOK_GRANT_WITH_OPTION: + case HiveParser.TOK_GRANT: + case HiveParser.TOK_REVOKE_ROLE: + case HiveParser.TOK_REVOKE: + case HiveParser.TOK_SHOW_GRANT: + case HiveParser.TOK_SHOW_ROLE_GRANT: + + // Misc DDL + case HiveParser.TOK_LOCKTABLE: + case HiveParser.TOK_UNLOCKTABLE: + case HiveParser.TOK_SHOWLOCKS: + case HiveParser.TOK_DESCFUNCTION: + case HiveParser.TOK_SHOWFUNCTIONS: + case HiveParser.TOK_EXPLAIN: + + // Table DDL + case HiveParser.TOK_ALTERTABLE_ADDPARTS: + case HiveParser.TOK_ALTERTABLE_ADDCOLS: + case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: + case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: + case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: + case HiveParser.TOK_ALTERTABLE_DROPPARTS: + case HiveParser.TOK_ALTERTABLE_PROPERTIES: + case HiveParser.TOK_ALTERTABLE_RENAME: + case HiveParser.TOK_ALTERTABLE_RENAMECOL: + case HiveParser.TOK_ALTERTABLE_REPLACECOLS: + case HiveParser.TOK_ALTERTABLE_SERIALIZER: + case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_DESCTABLE: + case HiveParser.TOK_DROPTABLE: + case HiveParser.TOK_SHOW_TABLESTATUS: + case HiveParser.TOK_SHOWPARTITIONS: + case HiveParser.TOK_SHOWTABLES: + break; + + default: + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, "Unexpected token: " + ast.getToken()); + } + + authorizeDDL(context, rootTasks); + + } catch (HCatException e) { + throw new SemanticException(e); + } catch (HiveException e) { + throw new SemanticException(e); } - private String extractTableName(String compoundName) { - /* - * the table name can potentially be a dot-format one with column names - * specified as part of the table name. e.g. a.b.c where b is a column in - * a and c is a field of the object/column b etc. For authorization - * purposes, we should use only the first part of the dotted name format. - * - */ - - String[] words = compoundName.split("\\."); - return words[0]; + if (hook != null) { + hook.postAnalyze(context, rootTasks); + } + } + + private String extractTableName(String compoundName) { + /* + * the table name can potentially be a dot-format one with column names + * specified as part of the table name. e.g. a.b.c where b is a column in + * a and c is a field of the object/column b etc. For authorization + * purposes, we should use only the first part of the dotted name format. + * + */ + + String[] words = compoundName.split("\\."); + return words[0]; + } + + @Override + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) + throws HiveException { + // DB opereations, none of them are enforced by Hive right now. + + ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); + if (showDatabases != null) { + authorize(HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), + HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); } - @Override - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) - throws HiveException { - // DB opereations, none of them are enforced by Hive right now. - - ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); - if (showDatabases != null) { - authorize(HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), - HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); - } - - DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); - if (dropDb != null) { - Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); - authorize(db, Privilege.DROP); - } + DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); + if (dropDb != null) { + Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); + authorize(db, Privilege.DROP); + } - DescDatabaseDesc descDb = work.getDescDatabaseDesc(); - if (descDb != null) { - Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); - authorize(db, Privilege.SELECT); - } + DescDatabaseDesc descDb = work.getDescDatabaseDesc(); + if (descDb != null) { + Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); + authorize(db, Privilege.SELECT); + } - SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); - if (switchDb != null) { - Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); - authorize(db, Privilege.SELECT); - } + SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); + if (switchDb != null) { + Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); + authorize(db, Privilege.SELECT); + } - ShowTablesDesc showTables = work.getShowTblsDesc(); - if (showTables != null) { - String dbName = showTables.getDbName() == null ? cntxt.getHive().getCurrentDatabase() - : showTables.getDbName(); - authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); - } + ShowTablesDesc showTables = work.getShowTblsDesc(); + if (showTables != null) { + String dbName = showTables.getDbName() == null ? cntxt.getHive().getCurrentDatabase() + : showTables.getDbName(); + authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); + } - ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); - if (showTableStatus != null) { - String dbName = showTableStatus.getDbName() == null ? cntxt.getHive().getCurrentDatabase() - : showTableStatus.getDbName(); - authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); - } + ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); + if (showTableStatus != null) { + String dbName = showTableStatus.getDbName() == null ? cntxt.getHive().getCurrentDatabase() + : showTableStatus.getDbName(); + authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); + } - // TODO: add alter database support in HCat - - // Table operations. - - DropTableDesc dropTable = work.getDropTblDesc(); - if (dropTable != null) { - if (dropTable.getPartSpecs() == null) { - // drop table is already enforced by Hive. We only check for table level location even if the - // table is partitioned. - } else { - //this is actually a ALTER TABLE DROP PARITITION statement - for (PartitionSpec partSpec : dropTable.getPartSpecs()) { - // partitions are not added as write entries in drop partitions in Hive - Table table = hive.getTable(hive.getCurrentDatabase(), dropTable.getTableName()); - List partitions = null; - try { - partitions = hive.getPartitionsByFilter(table, partSpec.toString()); - } catch (Exception e) { - throw new HiveException(e); - } - - for (Partition part : partitions) { - authorize(part, Privilege.DROP); - } - } - } + // TODO: add alter database support in HCat + + // Table operations. + + DropTableDesc dropTable = work.getDropTblDesc(); + if (dropTable != null) { + if (dropTable.getPartSpecs() == null) { + // drop table is already enforced by Hive. We only check for table level location even if the + // table is partitioned. + } else { + //this is actually a ALTER TABLE DROP PARITITION statement + for (PartitionSpec partSpec : dropTable.getPartSpecs()) { + // partitions are not added as write entries in drop partitions in Hive + Table table = hive.getTable(hive.getCurrentDatabase(), dropTable.getTableName()); + List partitions = null; + try { + partitions = hive.getPartitionsByFilter(table, partSpec.toString()); + } catch (Exception e) { + throw new HiveException(e); + } + + for (Partition part : partitions) { + authorize(part, Privilege.DROP); + } } + } + } - AlterTableDesc alterTable = work.getAlterTblDesc(); - if (alterTable != null) { - Table table = hive.getTable(hive.getCurrentDatabase(), alterTable.getOldName(), false); - - Partition part = null; - if (alterTable.getPartSpec() != null) { - part = hive.getPartition(table, alterTable.getPartSpec(), false); - } - - String newLocation = alterTable.getNewLocation(); - - /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements - * for the old table/partition location and the new location. - */ - if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { - if (part != null) { - authorize(part, Privilege.ALTER_DATA); // authorize for the old - // location, and new location - part.setLocation(newLocation); - authorize(part, Privilege.ALTER_DATA); - } else { - authorize(table, Privilege.ALTER_DATA); // authorize for the old - // location, and new location - table.getTTable().getSd().setLocation(newLocation); - authorize(table, Privilege.ALTER_DATA); - } - } - //other alter operations are already supported by Hive + AlterTableDesc alterTable = work.getAlterTblDesc(); + if (alterTable != null) { + Table table = hive.getTable(hive.getCurrentDatabase(), alterTable.getOldName(), false); + + Partition part = null; + if (alterTable.getPartSpec() != null) { + part = hive.getPartition(table, alterTable.getPartSpec(), false); + } + + String newLocation = alterTable.getNewLocation(); + + /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements + * for the old table/partition location and the new location. + */ + if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { + if (part != null) { + authorize(part, Privilege.ALTER_DATA); // authorize for the old + // location, and new location + part.setLocation(newLocation); + authorize(part, Privilege.ALTER_DATA); + } else { + authorize(table, Privilege.ALTER_DATA); // authorize for the old + // location, and new location + table.getTTable().getSd().setLocation(newLocation); + authorize(table, Privilege.ALTER_DATA); } + } + //other alter operations are already supported by Hive + } - // we should be careful when authorizing table based on just the - // table name. If columns have separate authorization domain, it - // must be honored - DescTableDesc descTable = work.getDescTblDesc(); - if (descTable != null) { - String tableName = extractTableName(descTable.getTableName()); - authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); - } + // we should be careful when authorizing table based on just the + // table name. If columns have separate authorization domain, it + // must be honored + DescTableDesc descTable = work.getDescTblDesc(); + if (descTable != null) { + String tableName = extractTableName(descTable.getTableName()); + authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); + } - ShowPartitionsDesc showParts = work.getShowPartsDesc(); - if (showParts != null) { - String tableName = extractTableName(showParts.getTabName()); - authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); - } + ShowPartitionsDesc showParts = work.getShowPartsDesc(); + if (showParts != null) { + String tableName = extractTableName(showParts.getTabName()); + authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java index 6b78800..3740795 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java @@ -45,136 +45,136 @@ */ public class HCatSemanticAnalyzerBase extends AbstractSemanticAnalyzerHook { - private HiveAuthorizationProvider authProvider; + private HiveAuthorizationProvider authProvider; - protected String getDbName(Hive hive, String dbName) { - return dbName == null ? hive.getCurrentDatabase() : dbName; - } - - public HiveAuthorizationProvider getAuthProvider() { - if (authProvider == null) { - authProvider = SessionState.get().getAuthorizer(); - } + protected String getDbName(Hive hive, String dbName) { + return dbName == null ? hive.getCurrentDatabase() : dbName; + } - return authProvider; + public HiveAuthorizationProvider getAuthProvider() { + if (authProvider == null) { + authProvider = SessionState.get().getAuthorizer(); } - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - super.postAnalyze(context, rootTasks); - - //Authorize the operation. - authorizeDDL(context, rootTasks); + return authProvider; + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + super.postAnalyze(context, rootTasks); + + //Authorize the operation. + authorizeDDL(context, rootTasks); + } + + /** + * Checks for the given rootTasks, and calls authorizeDDLWork() for each DDLWork to + * be authorized. The hooks should override this, or authorizeDDLWork to perform the + * actual authorization. + */ + /* + * Impl note: Hive provides authorization with it's own model, and calls the defined + * HiveAuthorizationProvider from Driver.doAuthorization(). However, HCat has to + * do additional calls to the auth provider to implement expected behavior for + * StorageDelegationAuthorizationProvider. This means, that the defined auth provider + * is called by both Hive and HCat. The following are missing from Hive's implementation, + * and when they are fixed in Hive, we can remove the HCat-specific auth checks. + * 1. CREATE DATABASE/TABLE, ADD PARTITION statements does not call + * HiveAuthorizationProvider.authorize() with the candidate objects, which means that + * we cannot do checks against defined LOCATION. + * 2. HiveOperation does not define sufficient Privileges for most of the operations, + * especially database operations. + * 3. For some of the operations, Hive SemanticAnalyzer does not add the changed + * object as a WriteEntity or ReadEntity. + * + * @see https://issues.apache.org/jira/browse/HCATALOG-244 + * @see https://issues.apache.org/jira/browse/HCATALOG-245 + */ + protected void authorizeDDL(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + + if (!HiveConf.getBoolVar(context.getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + return; } - /** - * Checks for the given rootTasks, and calls authorizeDDLWork() for each DDLWork to - * be authorized. The hooks should override this, or authorizeDDLWork to perform the - * actual authorization. - */ - /* - * Impl note: Hive provides authorization with it's own model, and calls the defined - * HiveAuthorizationProvider from Driver.doAuthorization(). However, HCat has to - * do additional calls to the auth provider to implement expected behavior for - * StorageDelegationAuthorizationProvider. This means, that the defined auth provider - * is called by both Hive and HCat. The following are missing from Hive's implementation, - * and when they are fixed in Hive, we can remove the HCat-specific auth checks. - * 1. CREATE DATABASE/TABLE, ADD PARTITION statements does not call - * HiveAuthorizationProvider.authorize() with the candidate objects, which means that - * we cannot do checks against defined LOCATION. - * 2. HiveOperation does not define sufficient Privileges for most of the operations, - * especially database operations. - * 3. For some of the operations, Hive SemanticAnalyzer does not add the changed - * object as a WriteEntity or ReadEntity. - * - * @see https://issues.apache.org/jira/browse/HCATALOG-244 - * @see https://issues.apache.org/jira/browse/HCATALOG-245 - */ - protected void authorizeDDL(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - - if (!HiveConf.getBoolVar(context.getConf(), - HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - return; - } + Hive hive; + try { + hive = context.getHive(); - Hive hive; - try { - hive = context.getHive(); - - for (Task task : rootTasks) { - if (task.getWork() instanceof DDLWork) { - DDLWork work = (DDLWork) task.getWork(); - if (work != null) { - authorizeDDLWork(context, hive, work); - } - } - } - } catch (SemanticException ex) { - throw ex; - } catch (AuthorizationException ex) { - throw ex; - } catch (Exception ex) { - throw new SemanticException(ex); + for (Task task : rootTasks) { + if (task.getWork() instanceof DDLWork) { + DDLWork work = (DDLWork) task.getWork(); + if (work != null) { + authorizeDDLWork(context, hive, work); + } } + } + } catch (SemanticException ex) { + throw ex; + } catch (AuthorizationException ex) { + throw ex; + } catch (Exception ex) { + throw new SemanticException(ex); } - - /** - * Authorized the given DDLWork. Does nothing by default. Override this - * and delegate to the relevant method in HiveAuthorizationProvider obtained by - * getAuthProvider(). - */ - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, - Hive hive, DDLWork work) throws HiveException { + } + + /** + * Authorized the given DDLWork. Does nothing by default. Override this + * and delegate to the relevant method in HiveAuthorizationProvider obtained by + * getAuthProvider(). + */ + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, + Hive hive, DDLWork work) throws HiveException { + } + + protected void authorize(Privilege[] inputPrivs, Privilege[] outputPrivs) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(inputPrivs, outputPrivs); + } catch (HiveException ex) { + throw new SemanticException(ex); } - - protected void authorize(Privilege[] inputPrivs, Privilege[] outputPrivs) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(inputPrivs, outputPrivs); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + } + + protected void authorize(Database db, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(db, null, new Privilege[]{priv}); + } catch (HiveException ex) { + throw new SemanticException(ex); } - - protected void authorize(Database db, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(db, null, new Privilege[]{priv}); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + } + + protected void authorizeTable(Hive hive, String tableName, Privilege priv) + throws AuthorizationException, HiveException { + Table table; + try { + table = hive.getTable(tableName); + } catch (InvalidTableException ite) { + // Table itself doesn't exist in metastore, nothing to validate. + return; } - protected void authorizeTable(Hive hive, String tableName, Privilege priv) - throws AuthorizationException, HiveException { - Table table; - try { - table = hive.getTable(tableName); - } catch (InvalidTableException ite) { - // Table itself doesn't exist in metastore, nothing to validate. - return; - } - - authorize(table, priv); - } + authorize(table, priv); + } - protected void authorize(Table table, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(table, new Privilege[]{priv}, null); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + protected void authorize(Table table, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(table, new Privilege[]{priv}, null); + } catch (HiveException ex) { + throw new SemanticException(ex); } - - protected void authorize(Partition part, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(part, new Privilege[]{priv}, null); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + } + + protected void authorize(Partition part, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(part, new Privilege[]{priv}, null); + } catch (HiveException ex) { + throw new SemanticException(ex); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/common/ErrorType.java b/hcatalog/core/src/main/java/org/apache/hcatalog/common/ErrorType.java index 88f9c11..2e16b47 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/common/ErrorType.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/common/ErrorType.java @@ -24,115 +24,115 @@ */ public enum ErrorType { - /* HCat Input Format related errors 1000 - 1999 */ - ERROR_DB_INIT (1000, "Error initializing database session"), - ERROR_EXCEED_MAXPART (1001, "Query result exceeded maximum number of partitions allowed"), - - ERROR_SET_INPUT (1002, "Error setting input information"), - - /* HCat Output Format related errors 2000 - 2999 */ - ERROR_INVALID_TABLE (2000, "Table specified does not exist"), - ERROR_SET_OUTPUT (2001, "Error setting output information"), - ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"), - ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"), - ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"), - ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"), - ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"), - ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"), - ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"), - ERROR_SCHEMA_TYPE_MISMATCH (2009, "Invalid column type in partition schema"), - ERROR_INVALID_PARTITION_VALUES (2010, "Invalid partition values specified"), - ERROR_MISSING_PARTITION_KEY (2011, "Partition key value not provided for publish"), - ERROR_MOVE_FAILED (2012, "Moving of data failed during commit"), - ERROR_TOO_MANY_DYNAMIC_PTNS (2013, "Attempt to create too many dynamic partitions"), - ERROR_INIT_LOADER (2014, "Error initializing Pig loader"), - ERROR_INIT_STORER (2015, "Error initializing Pig storer"), - ERROR_NOT_SUPPORTED (2016, "Error operation not supported"), - - /* Authorization Errors 3000 - 3999 */ - ERROR_ACCESS_CONTROL (3000, "Permission denied"), - - /* Miscellaneous errors, range 9000 - 9998 */ - ERROR_UNIMPLEMENTED (9000, "Functionality currently unimplemented"), - ERROR_INTERNAL_EXCEPTION (9001, "Exception occurred while processing HCat request"); - - /** The error code. */ - private int errorCode; - - /** The error message. */ - private String errorMessage; - - /** Should the causal exception message be appended to the error message, yes by default*/ - private boolean appendCauseMessage = true; - - /** Is this a retriable error, no by default. */ - private boolean isRetriable = false; - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - */ - private ErrorType(int errorCode, String errorMessage) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - } - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - * @param appendCauseMessage should causal exception message be appended to error message - */ - private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - this.appendCauseMessage = appendCauseMessage; - } - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - * @param appendCauseMessage should causal exception message be appended to error message - * @param isRetriable is this a retriable error - */ - private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage, boolean isRetriable) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - this.appendCauseMessage = appendCauseMessage; - this.isRetriable = isRetriable; - } - - /** - * Gets the error code. - * @return the error code - */ - public int getErrorCode() { - return errorCode; - } - - /** - * Gets the error message. - * @return the error message - */ - public String getErrorMessage() { - return errorMessage; - } - - /** - * Checks if this is a retriable error. - * @return true, if is a retriable error, false otherwise - */ - public boolean isRetriable() { - return isRetriable; - } - - /** - * Whether the cause of the exception should be added to the error message. - * @return true, if the cause should be added to the message, false otherwise - */ - public boolean appendCauseMessage() { - return appendCauseMessage; - } + /* HCat Input Format related errors 1000 - 1999 */ + ERROR_DB_INIT (1000, "Error initializing database session"), + ERROR_EXCEED_MAXPART (1001, "Query result exceeded maximum number of partitions allowed"), + + ERROR_SET_INPUT (1002, "Error setting input information"), + + /* HCat Output Format related errors 2000 - 2999 */ + ERROR_INVALID_TABLE (2000, "Table specified does not exist"), + ERROR_SET_OUTPUT (2001, "Error setting output information"), + ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"), + ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"), + ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"), + ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"), + ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"), + ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"), + ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"), + ERROR_SCHEMA_TYPE_MISMATCH (2009, "Invalid column type in partition schema"), + ERROR_INVALID_PARTITION_VALUES (2010, "Invalid partition values specified"), + ERROR_MISSING_PARTITION_KEY (2011, "Partition key value not provided for publish"), + ERROR_MOVE_FAILED (2012, "Moving of data failed during commit"), + ERROR_TOO_MANY_DYNAMIC_PTNS (2013, "Attempt to create too many dynamic partitions"), + ERROR_INIT_LOADER (2014, "Error initializing Pig loader"), + ERROR_INIT_STORER (2015, "Error initializing Pig storer"), + ERROR_NOT_SUPPORTED (2016, "Error operation not supported"), + + /* Authorization Errors 3000 - 3999 */ + ERROR_ACCESS_CONTROL (3000, "Permission denied"), + + /* Miscellaneous errors, range 9000 - 9998 */ + ERROR_UNIMPLEMENTED (9000, "Functionality currently unimplemented"), + ERROR_INTERNAL_EXCEPTION (9001, "Exception occurred while processing HCat request"); + + /** The error code. */ + private int errorCode; + + /** The error message. */ + private String errorMessage; + + /** Should the causal exception message be appended to the error message, yes by default*/ + private boolean appendCauseMessage = true; + + /** Is this a retriable error, no by default. */ + private boolean isRetriable = false; + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + */ + private ErrorType(int errorCode, String errorMessage) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + } + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + * @param appendCauseMessage should causal exception message be appended to error message + */ + private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + this.appendCauseMessage = appendCauseMessage; + } + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + * @param appendCauseMessage should causal exception message be appended to error message + * @param isRetriable is this a retriable error + */ + private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage, boolean isRetriable) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + this.appendCauseMessage = appendCauseMessage; + this.isRetriable = isRetriable; + } + + /** + * Gets the error code. + * @return the error code + */ + public int getErrorCode() { + return errorCode; + } + + /** + * Gets the error message. + * @return the error message + */ + public String getErrorMessage() { + return errorMessage; + } + + /** + * Checks if this is a retriable error. + * @return true, if is a retriable error, false otherwise + */ + public boolean isRetriable() { + return isRetriable; + } + + /** + * Whether the cause of the exception should be added to the error message. + * @return true, if the cause should be added to the message, false otherwise + */ + public boolean appendCauseMessage() { + return appendCauseMessage; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatConstants.java b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatConstants.java index e1e04c8..a7e5b91 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatConstants.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatConstants.java @@ -27,163 +27,163 @@ */ public final class HCatConstants { - public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; - public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; - - public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); - public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); - - public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage"; - public static final String HCAT_PIG_LOADER = "hcat.pig.loader"; - public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set"; - public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args"; - public static final String HCAT_PIG_STORER = "hcat.pig.storer"; - public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args"; - public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter"; - public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ","; - public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"; - public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name"; - public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple"; - public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name"; - public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield"; - - /** - * {@value} (default: null) - * When the property is set in the UDFContext of the org.apache.hcatalog.pig.HCatStorer, HCatStorer writes - * to the location it specifies instead of the default HCatalog location format. An example can be found - * in org.apache.hcatalog.pig.HCatStorerWrapper. - */ - public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location"; - - //The keys used to store info into the job Configuration - public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat"; - - public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema"; - - public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info"; - - // hcatalog specific configurations, that can be put in hive-site.xml - public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time"; - - private HCatConstants() { // restrict instantiation - } - - public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema"; - - public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname; - - public static final String HCAT_PERMS = "hcat.perms"; - - public static final String HCAT_GROUP = "hcat.group"; - - public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name"; - - public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name"; - - public static final String HCAT_METASTORE_PRINCIPAL - = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname; - - /** - * The desired number of input splits produced for each partition. When the - * input files are large and few, we want to split them into many splits, - * so as to increase the parallelizm of loading the splits. Try also two - * other parameters, mapred.min.split.size and mapred.max.split.size, to - * control the number of input splits. - */ - public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = - "hcat.desired.partition.num.splits"; - - // IMPORTANT IMPORTANT IMPORTANT!!!!! - //The keys used to store info into the job Configuration. - //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer - //updates the job configuration in the backend to insert these keys to avoid - //having to call setOutput from the backend (which would cause a metastore call - //from the map jobs) - public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput"; - public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info"; - public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf"; - public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig"; - - public static final String[] OUTPUT_CONFS_TO_SAVE = { - HCAT_KEY_OUTPUT_INFO, - HCAT_KEY_HIVE_CONF, - HCAT_KEY_TOKEN_SIGNATURE - }; - - - public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq"; - public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration"; - - public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name"; - public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy"; - public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix"; - - public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + "dynamic.jobid"; - public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false; - - // Message Bus related properties. - public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat"; - public static final String HCAT_EVENT = "HCAT_EVENT"; - public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION"; - public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION"; - public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE"; - public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE"; - public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE"; - public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE"; - public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE"; - public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION"; - public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT"; - public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; - public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; - public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hcatalog.messaging.json.JSONMessageFactory"; - - // System environment variables - public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"; - - // Hadoop Conf Var Names - public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; - - //*************************************************************************** - // Data-related configuration properties. - //*************************************************************************** - - /** - * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}). - * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions - * will not expect boolean values when upgrading Pig. For integration the option is offered to - * convert boolean fields to integers by setting this Hadoop configuration key. - */ - public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = - "hcat.data.convert.boolean.to.integer"; - public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; - - /** - * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}). - * Hive tables support tinyint and smallint columns, while not all processing frameworks support - * these types (Pig only has integer for example). Enable this property to promote tinyint and - * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns - * enforce bounds checking and jobs will fail if attempting to write values outside the column - * bounds. - */ - public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION = - "hcat.data.tiny.small.int.promotion"; - public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false; - - /** - * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}). - * Threshold for the ratio of bad records that will be silently skipped without causing a task - * failure. This is useful when processing large data sets with corrupt records, when its - * acceptable to skip some bad records. - */ - public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold"; - public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f; - - /** - * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}). - * Number of bad records that will be accepted before applying - * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad - * record from causing a task failure. - */ - public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min"; - public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2; + public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; + public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; + + public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); + public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); + + public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage"; + public static final String HCAT_PIG_LOADER = "hcat.pig.loader"; + public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set"; + public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args"; + public static final String HCAT_PIG_STORER = "hcat.pig.storer"; + public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args"; + public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter"; + public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ","; + public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"; + public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name"; + public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple"; + public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name"; + public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield"; + + /** + * {@value} (default: null) + * When the property is set in the UDFContext of the org.apache.hcatalog.pig.HCatStorer, HCatStorer writes + * to the location it specifies instead of the default HCatalog location format. An example can be found + * in org.apache.hcatalog.pig.HCatStorerWrapper. + */ + public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location"; + + //The keys used to store info into the job Configuration + public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat"; + + public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema"; + + public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info"; + + // hcatalog specific configurations, that can be put in hive-site.xml + public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time"; + + private HCatConstants() { // restrict instantiation + } + + public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema"; + + public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname; + + public static final String HCAT_PERMS = "hcat.perms"; + + public static final String HCAT_GROUP = "hcat.group"; + + public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name"; + + public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name"; + + public static final String HCAT_METASTORE_PRINCIPAL + = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname; + + /** + * The desired number of input splits produced for each partition. When the + * input files are large and few, we want to split them into many splits, + * so as to increase the parallelizm of loading the splits. Try also two + * other parameters, mapred.min.split.size and mapred.max.split.size, to + * control the number of input splits. + */ + public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = + "hcat.desired.partition.num.splits"; + + // IMPORTANT IMPORTANT IMPORTANT!!!!! + //The keys used to store info into the job Configuration. + //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer + //updates the job configuration in the backend to insert these keys to avoid + //having to call setOutput from the backend (which would cause a metastore call + //from the map jobs) + public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput"; + public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info"; + public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf"; + public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig"; + + public static final String[] OUTPUT_CONFS_TO_SAVE = { + HCAT_KEY_OUTPUT_INFO, + HCAT_KEY_HIVE_CONF, + HCAT_KEY_TOKEN_SIGNATURE + }; + + + public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq"; + public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration"; + + public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name"; + public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy"; + public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix"; + + public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + "dynamic.jobid"; + public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false; + + // Message Bus related properties. + public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat"; + public static final String HCAT_EVENT = "HCAT_EVENT"; + public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION"; + public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION"; + public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE"; + public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE"; + public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE"; + public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE"; + public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE"; + public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION"; + public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT"; + public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; + public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; + public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hcatalog.messaging.json.JSONMessageFactory"; + + // System environment variables + public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"; + + // Hadoop Conf Var Names + public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; + + //*************************************************************************** + // Data-related configuration properties. + //*************************************************************************** + + /** + * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}). + * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions + * will not expect boolean values when upgrading Pig. For integration the option is offered to + * convert boolean fields to integers by setting this Hadoop configuration key. + */ + public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = + "hcat.data.convert.boolean.to.integer"; + public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; + + /** + * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}). + * Hive tables support tinyint and smallint columns, while not all processing frameworks support + * these types (Pig only has integer for example). Enable this property to promote tinyint and + * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns + * enforce bounds checking and jobs will fail if attempting to write values outside the column + * bounds. + */ + public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION = + "hcat.data.tiny.small.int.promotion"; + public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false; + + /** + * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}). + * Threshold for the ratio of bad records that will be silently skipped without causing a task + * failure. This is useful when processing large data sets with corrupt records, when its + * acceptable to skip some bad records. + */ + public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold"; + public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f; + + /** + * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}). + * Number of bad records that will be accepted before applying + * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad + * record from causing a task failure. + */ + public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min"; + public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2; } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatContext.java b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatContext.java index d011a1a..9254071 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatContext.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatContext.java @@ -49,41 +49,41 @@ @InterfaceAudience.Public @InterfaceStability.Evolving public enum HCatContext { - INSTANCE; + INSTANCE; - private Configuration conf = null; + private Configuration conf = null; - /** - * Use the given configuration for optional behavior. Keys exclusive to an existing config - * are set in the new conf. The job conf must be used to ensure properties are passed to - * backend MR tasks. - */ - public synchronized HCatContext setConf(Configuration newConf) { - Preconditions.checkNotNull(newConf, "Required parameter 'newConf' must not be null."); + /** + * Use the given configuration for optional behavior. Keys exclusive to an existing config + * are set in the new conf. The job conf must be used to ensure properties are passed to + * backend MR tasks. + */ + public synchronized HCatContext setConf(Configuration newConf) { + Preconditions.checkNotNull(newConf, "Required parameter 'newConf' must not be null."); - if (conf == null) { - conf = newConf; - return this; - } + if (conf == null) { + conf = newConf; + return this; + } - if (conf != newConf) { - for (Map.Entry entry : conf) { - if ((entry.getKey().matches("hcat.*")) && (newConf.get(entry.getKey()) == null)) { - newConf.set(entry.getKey(), entry.getValue()); - } - } - conf = newConf; + if (conf != newConf) { + for (Map.Entry entry : conf) { + if ((entry.getKey().matches("hcat.*")) && (newConf.get(entry.getKey()) == null)) { + newConf.set(entry.getKey(), entry.getValue()); } - return this; + } + conf = newConf; } + return this; + } - /** - * Get the configuration, if there is one. Users are not required to setup HCatContext - * unless they wish to override default behavior, so the configuration may not be present. - * - * @return an Optional that might contain a Configuration - */ - public Optional getConf() { - return Optional.fromNullable(conf); - } + /** + * Get the configuration, if there is one. Users are not required to setup HCatContext + * unless they wish to override default behavior, so the configuration may not be present. + * + * @return an Optional that might contain a Configuration + */ + public Optional getConf() { + return Optional.fromNullable(conf); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatException.java b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatException.java index eeca28f..a2bb2ab 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatException.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatException.java @@ -26,135 +26,135 @@ */ public class HCatException extends IOException { - private static final long serialVersionUID = 1L; - - /** The error type enum for this exception. */ - private final ErrorType errorType; - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - */ - public HCatException(ErrorType errorType) { - this(errorType, null, null); - } - - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param cause the cause - */ - public HCatException(ErrorType errorType, Throwable cause) { - this(errorType, null, cause); - } - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param extraMessage extra messages to add to the message string - */ - public HCatException(ErrorType errorType, String extraMessage) { - this(errorType, extraMessage, null); - } - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param extraMessage extra messages to add to the message string - * @param cause the cause - */ - public HCatException(ErrorType errorType, String extraMessage, Throwable cause) { - super(buildErrorMessage( - errorType, - extraMessage, - cause), cause); - this.errorType = errorType; - } - - - //TODO : remove default error type constructors after all exceptions - //are changed to use error types - - /** - * Instantiates a new hcat exception. - * @param message the error message - */ - public HCatException(String message) { - this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, null); - } - - /** - * Instantiates a new hcat exception. - * @param message the error message - * @param cause the cause - */ - public HCatException(String message, Throwable cause) { - this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, cause); - } - - - /** - * Builds the error message string. The error type message is appended with the extra message. If appendCause - * is true for the error type, then the message of the cause also is added to the message. - * @param type the error type - * @param extraMessage the extra message string - * @param cause the cause for the exception - * @return the exception message string - */ - public static String buildErrorMessage(ErrorType type, String extraMessage, Throwable cause) { - - //Initial message is just the error type message - StringBuffer message = new StringBuffer(HCatException.class.getName()); - message.append(" : " + type.getErrorCode()); - message.append(" : " + type.getErrorMessage()); - - if (extraMessage != null) { - //Add the extra message value to buffer - message.append(" : " + extraMessage); - } - - if (type.appendCauseMessage()) { - if (cause != null) { - //Add the cause message to buffer - message.append(". Cause : " + cause.toString()); - } - } - - return message.toString(); + private static final long serialVersionUID = 1L; + + /** The error type enum for this exception. */ + private final ErrorType errorType; + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + */ + public HCatException(ErrorType errorType) { + this(errorType, null, null); + } + + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param cause the cause + */ + public HCatException(ErrorType errorType, Throwable cause) { + this(errorType, null, cause); + } + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param extraMessage extra messages to add to the message string + */ + public HCatException(ErrorType errorType, String extraMessage) { + this(errorType, extraMessage, null); + } + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param extraMessage extra messages to add to the message string + * @param cause the cause + */ + public HCatException(ErrorType errorType, String extraMessage, Throwable cause) { + super(buildErrorMessage( + errorType, + extraMessage, + cause), cause); + this.errorType = errorType; + } + + + //TODO : remove default error type constructors after all exceptions + //are changed to use error types + + /** + * Instantiates a new hcat exception. + * @param message the error message + */ + public HCatException(String message) { + this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, null); + } + + /** + * Instantiates a new hcat exception. + * @param message the error message + * @param cause the cause + */ + public HCatException(String message, Throwable cause) { + this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, cause); + } + + + /** + * Builds the error message string. The error type message is appended with the extra message. If appendCause + * is true for the error type, then the message of the cause also is added to the message. + * @param type the error type + * @param extraMessage the extra message string + * @param cause the cause for the exception + * @return the exception message string + */ + public static String buildErrorMessage(ErrorType type, String extraMessage, Throwable cause) { + + //Initial message is just the error type message + StringBuffer message = new StringBuffer(HCatException.class.getName()); + message.append(" : " + type.getErrorCode()); + message.append(" : " + type.getErrorMessage()); + + if (extraMessage != null) { + //Add the extra message value to buffer + message.append(" : " + extraMessage); } - - /** - * Is this a retriable error. - * @return is it retriable - */ - public boolean isRetriable() { - return errorType.isRetriable(); - } - - /** - * Gets the error type. - * @return the error type enum - */ - public ErrorType getErrorType() { - return errorType; + if (type.appendCauseMessage()) { + if (cause != null) { + //Add the cause message to buffer + message.append(". Cause : " + cause.toString()); + } } - /** - * Gets the error code. - * @return the error code - */ - public int getErrorCode() { - return errorType.getErrorCode(); - } - - /* (non-Javadoc) - * @see java.lang.Throwable#toString() - */ - @Override - public String toString() { - return getMessage(); - } + return message.toString(); + } + + + /** + * Is this a retriable error. + * @return is it retriable + */ + public boolean isRetriable() { + return errorType.isRetriable(); + } + + /** + * Gets the error type. + * @return the error type enum + */ + public ErrorType getErrorType() { + return errorType; + } + + /** + * Gets the error code. + * @return the error code + */ + public int getErrorCode() { + return errorType.getErrorCode(); + } + + /* (non-Javadoc) + * @see java.lang.Throwable#toString() + */ + @Override + public String toString() { + return getMessage(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatUtil.java b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatUtil.java index 80d7b55..27b3aa5 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatUtil.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatUtil.java @@ -79,552 +79,552 @@ */ public class HCatUtil { - private static final Logger LOG = LoggerFactory.getLogger(HCatUtil.class); - private static volatile HiveClientCache hiveClientCache; - private final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60; - - public static boolean checkJobContextIfRunningFromBackend(JobContext j) { - if (j.getConfiguration().get("mapred.task.id", "").equals("") && - !("true".equals(j.getConfiguration().get("pig.illustrating")))) { - return false; - } - return true; + private static final Logger LOG = LoggerFactory.getLogger(HCatUtil.class); + private static volatile HiveClientCache hiveClientCache; + private final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60; + + public static boolean checkJobContextIfRunningFromBackend(JobContext j) { + if (j.getConfiguration().get("mapred.task.id", "").equals("") && + !("true".equals(j.getConfiguration().get("pig.illustrating")))) { + return false; } + return true; + } - public static String serialize(Serializable obj) throws IOException { - if (obj == null) { - return ""; - } - try { - ByteArrayOutputStream serialObj = new ByteArrayOutputStream(); - ObjectOutputStream objStream = new ObjectOutputStream(serialObj); - objStream.writeObject(obj); - objStream.close(); - return encodeBytes(serialObj.toByteArray()); - } catch (Exception e) { - throw new IOException("Serialization error: " + e.getMessage(), e); - } + public static String serialize(Serializable obj) throws IOException { + if (obj == null) { + return ""; } - - public static Object deserialize(String str) throws IOException { - if (str == null || str.length() == 0) { - return null; - } - try { - ByteArrayInputStream serialObj = new ByteArrayInputStream( - decodeBytes(str)); - ObjectInputStream objStream = new ObjectInputStream(serialObj); - return objStream.readObject(); - } catch (Exception e) { - throw new IOException("Deserialization error: " + e.getMessage(), e); - } + try { + ByteArrayOutputStream serialObj = new ByteArrayOutputStream(); + ObjectOutputStream objStream = new ObjectOutputStream(serialObj); + objStream.writeObject(obj); + objStream.close(); + return encodeBytes(serialObj.toByteArray()); + } catch (Exception e) { + throw new IOException("Serialization error: " + e.getMessage(), e); } + } - public static String encodeBytes(byte[] bytes) { - StringBuffer strBuf = new StringBuffer(); - - for (int i = 0; i < bytes.length; i++) { - strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a'))); - strBuf.append((char) (((bytes[i]) & 0xF) + ('a'))); - } - - return strBuf.toString(); + public static Object deserialize(String str) throws IOException { + if (str == null || str.length() == 0) { + return null; } - - public static byte[] decodeBytes(String str) { - byte[] bytes = new byte[str.length() / 2]; - for (int i = 0; i < str.length(); i += 2) { - char c = str.charAt(i); - bytes[i / 2] = (byte) ((c - 'a') << 4); - c = str.charAt(i + 1); - bytes[i / 2] += (c - 'a'); - } - return bytes; + try { + ByteArrayInputStream serialObj = new ByteArrayInputStream( + decodeBytes(str)); + ObjectInputStream objStream = new ObjectInputStream(serialObj); + return objStream.readObject(); + } catch (Exception e) { + throw new IOException("Deserialization error: " + e.getMessage(), e); } + } - public static List getHCatFieldSchemaList( - FieldSchema... fields) throws HCatException { - List result = new ArrayList( - fields.length); - - for (FieldSchema f : fields) { - result.add(HCatSchemaUtils.getHCatFieldSchema(f)); - } + public static String encodeBytes(byte[] bytes) { + StringBuffer strBuf = new StringBuffer(); - return result; + for (int i = 0; i < bytes.length; i++) { + strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a'))); + strBuf.append((char) (((bytes[i]) & 0xF) + ('a'))); } - public static List getHCatFieldSchemaList( - List fields) throws HCatException { - if (fields == null) { - return null; - } else { - List result = new ArrayList(); - for (FieldSchema f : fields) { - result.add(HCatSchemaUtils.getHCatFieldSchema(f)); - } - return result; - } - } + return strBuf.toString(); + } - public static HCatSchema extractSchema(Table table) throws HCatException { - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + public static byte[] decodeBytes(String str) { + byte[] bytes = new byte[str.length() / 2]; + for (int i = 0; i < str.length(); i += 2) { + char c = str.charAt(i); + bytes[i / 2] = (byte) ((c - 'a') << 4); + c = str.charAt(i + 1); + bytes[i / 2] += (c - 'a'); } + return bytes; + } - public static HCatSchema extractSchema(Partition partition) throws HCatException { - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols())); - } + public static List getHCatFieldSchemaList( + FieldSchema... fields) throws HCatException { + List result = new ArrayList( + fields.length); - public static List getFieldSchemaList( - List hcatFields) { - if (hcatFields == null) { - return null; - } else { - List result = new ArrayList(); - for (HCatFieldSchema f : hcatFields) { - result.add(HCatSchemaUtils.getFieldSchema(f)); - } - return result; - } + for (FieldSchema f : fields) { + result.add(HCatSchemaUtils.getHCatFieldSchema(f)); } - public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) - throws NoSuchObjectException, TException, MetaException { - return new Table(client.getTable(dbName, tableName)); + return result; + } + + public static List getHCatFieldSchemaList( + List fields) throws HCatException { + if (fields == null) { + return null; + } else { + List result = new ArrayList(); + for (FieldSchema f : fields) { + result.add(HCatSchemaUtils.getHCatFieldSchema(f)); + } + return result; + } + } + + public static HCatSchema extractSchema(Table table) throws HCatException { + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + } + + public static HCatSchema extractSchema(Partition partition) throws HCatException { + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols())); + } + + public static List getFieldSchemaList( + List hcatFields) { + if (hcatFields == null) { + return null; + } else { + List result = new ArrayList(); + for (HCatFieldSchema f : hcatFields) { + result.add(HCatSchemaUtils.getFieldSchema(f)); + } + return result; } + } - public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { - HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) + throws NoSuchObjectException, TException, MetaException { + return new Table(client.getTable(dbName, tableName)); + } - if (table.getPartitionKeys().size() != 0) { + public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { + HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); - // add partition keys to table schema - // NOTE : this assumes that we do not ever have ptn keys as columns - // inside the table schema as well! - for (FieldSchema fs : table.getPartitionKeys()) { - tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - return tableSchema; - } + if (table.getPartitionKeys().size() != 0) { - /** - * return the partition columns from a table instance - * - * @param table the instance to extract partition columns from - * @return HCatSchema instance which contains the partition columns - * @throws IOException - */ - public static HCatSchema getPartitionColumns(Table table) throws IOException { - HCatSchema cols = new HCatSchema(new LinkedList()); - if (table.getPartitionKeys().size() != 0) { - for (FieldSchema fs : table.getPartitionKeys()) { - cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - return cols; + // add partition keys to table schema + // NOTE : this assumes that we do not ever have ptn keys as columns + // inside the table schema as well! + for (FieldSchema fs : table.getPartitionKeys()) { + tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + return tableSchema; + } + + /** + * return the partition columns from a table instance + * + * @param table the instance to extract partition columns from + * @return HCatSchema instance which contains the partition columns + * @throws IOException + */ + public static HCatSchema getPartitionColumns(Table table) throws IOException { + HCatSchema cols = new HCatSchema(new LinkedList()); + if (table.getPartitionKeys().size() != 0) { + for (FieldSchema fs : table.getPartitionKeys()) { + cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + return cols; + } + + /** + * Validate partition schema, checks if the column types match between the + * partition and the existing table schema. Returns the list of columns + * present in the partition but not in the table. + * + * @param table the table + * @param partitionSchema the partition schema + * @return the list of newly added fields + * @throws IOException Signals that an I/O exception has occurred. + */ + public static List validatePartitionSchema(Table table, + HCatSchema partitionSchema) throws IOException { + Map partitionKeyMap = new HashMap(); + + for (FieldSchema field : table.getPartitionKeys()) { + partitionKeyMap.put(field.getName().toLowerCase(), field); } - /** - * Validate partition schema, checks if the column types match between the - * partition and the existing table schema. Returns the list of columns - * present in the partition but not in the table. - * - * @param table the table - * @param partitionSchema the partition schema - * @return the list of newly added fields - * @throws IOException Signals that an I/O exception has occurred. - */ - public static List validatePartitionSchema(Table table, - HCatSchema partitionSchema) throws IOException { - Map partitionKeyMap = new HashMap(); - - for (FieldSchema field : table.getPartitionKeys()) { - partitionKeyMap.put(field.getName().toLowerCase(), field); - } + List tableCols = table.getCols(); + List newFields = new ArrayList(); - List tableCols = table.getCols(); - List newFields = new ArrayList(); - - for (int i = 0; i < partitionSchema.getFields().size(); i++) { - - FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema - .getFields().get(i)); - - FieldSchema tableField; - if (i < tableCols.size()) { - tableField = tableCols.get(i); - - if (!tableField.getName().equalsIgnoreCase(field.getName())) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, - "Expected column <" + tableField.getName() - + "> at position " + (i + 1) - + ", found column <" + field.getName() - + ">"); - } - } else { - tableField = partitionKeyMap.get(field.getName().toLowerCase()); - - if (tableField != null) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" - + field.getName() + ">"); - } - } - - if (tableField == null) { - // field present in partition but not in table - newFields.add(field); - } else { - // field present in both. validate type has not changed - TypeInfo partitionType = TypeInfoUtils - .getTypeInfoFromTypeString(field.getType()); - TypeInfo tableType = TypeInfoUtils - .getTypeInfoFromTypeString(tableField.getType()); - - if (!partitionType.equals(tableType)) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" - + field.getName() + ">, expected <" - + tableType.getTypeName() + ">, got <" - + partitionType.getTypeName() + ">"); - } - } - } + for (int i = 0; i < partitionSchema.getFields().size(); i++) { - return newFields; - } + FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema + .getFields().get(i)); - /** - * Test if the first FsAction is more permissive than the second. This is - * useful in cases where we want to ensure that a file owner has more - * permissions than the group they belong to, for eg. More completely(but - * potentially more cryptically) owner-r >= group-r >= world-r : bitwise - * and-masked with 0444 => 444 >= 440 >= 400 >= 000 owner-w >= group-w >= - * world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000 - * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= - * 110 >= 100 >= 000 - * - * @return true if first FsAction is more permissive than the second, false - * if not. - */ - public static boolean validateMorePermissive(FsAction first, FsAction second) { - if ((first == FsAction.ALL) || (second == FsAction.NONE) - || (first == second)) { - return true; - } - switch (first) { - case READ_EXECUTE: - return ((second == FsAction.READ) || (second == FsAction.EXECUTE)); - case READ_WRITE: - return ((second == FsAction.READ) || (second == FsAction.WRITE)); - case WRITE_EXECUTE: - return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE)); + FieldSchema tableField; + if (i < tableCols.size()) { + tableField = tableCols.get(i); + + if (!tableField.getName().equalsIgnoreCase(field.getName())) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, + "Expected column <" + tableField.getName() + + "> at position " + (i + 1) + + ", found column <" + field.getName() + + ">"); } - return false; - } + } else { + tableField = partitionKeyMap.get(field.getName().toLowerCase()); - /** - * Ensure that read or write permissions are not granted without also - * granting execute permissions. Essentially, r-- , rw- and -w- are invalid, - * r-x, -wx, rwx, ---, --x are valid - * - * @param perms The FsAction to verify - * @return true if the presence of read or write permission is accompanied - * by execute permissions - */ - public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms) { - if ((perms == FsAction.READ) || (perms == FsAction.WRITE) - || (perms == FsAction.READ_WRITE)) { - return false; + if (tableField != null) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + + field.getName() + ">"); + } + } + + if (tableField == null) { + // field present in partition but not in table + newFields.add(field); + } else { + // field present in both. validate type has not changed + TypeInfo partitionType = TypeInfoUtils + .getTypeInfoFromTypeString(field.getType()); + TypeInfo tableType = TypeInfoUtils + .getTypeInfoFromTypeString(tableField.getType()); + + if (!partitionType.equals(tableType)) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + + field.getName() + ">, expected <" + + tableType.getTypeName() + ">, got <" + + partitionType.getTypeName() + ">"); } - return true; + } } - public static Token getJobTrackerDelegationToken( - Configuration conf, String userName) throws Exception { - // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")"); - JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class)); - Token t = jcl - .getDelegationToken(new Text(userName)); - // LOG.info("got "+t); - return t; - - // return null; + return newFields; + } + + /** + * Test if the first FsAction is more permissive than the second. This is + * useful in cases where we want to ensure that a file owner has more + * permissions than the group they belong to, for eg. More completely(but + * potentially more cryptically) owner-r >= group-r >= world-r : bitwise + * and-masked with 0444 => 444 >= 440 >= 400 >= 000 owner-w >= group-w >= + * world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000 + * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= + * 110 >= 100 >= 000 + * + * @return true if first FsAction is more permissive than the second, false + * if not. + */ + public static boolean validateMorePermissive(FsAction first, FsAction second) { + if ((first == FsAction.ALL) || (second == FsAction.NONE) + || (first == second)) { + return true; } - - public static Token extractThriftToken( - String tokenStrForm, String tokenSignature) throws MetaException, - TException, IOException { - // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")"); - Token t = new Token(); - t.decodeFromUrlString(tokenStrForm); - t.setService(new Text(tokenSignature)); - // LOG.info("returning "+t); - return t; + switch (first) { + case READ_EXECUTE: + return ((second == FsAction.READ) || (second == FsAction.EXECUTE)); + case READ_WRITE: + return ((second == FsAction.READ) || (second == FsAction.WRITE)); + case WRITE_EXECUTE: + return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE)); } - - /** - * Create an instance of a storage handler defined in storerInfo. If one cannot be found - * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. - * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. - * @param conf job's configuration will be used to configure the Configurable StorageHandler - * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe - * @return storageHandler instance - * @throws IOException - */ - public static HCatStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { - return getStorageHandler(conf, - storerInfo.getStorageHandlerClass(), - storerInfo.getSerdeClass(), - storerInfo.getIfClass(), - storerInfo.getOfClass()); + return false; + } + + /** + * Ensure that read or write permissions are not granted without also + * granting execute permissions. Essentially, r-- , rw- and -w- are invalid, + * r-x, -wx, rwx, ---, --x are valid + * + * @param perms The FsAction to verify + * @return true if the presence of read or write permission is accompanied + * by execute permissions + */ + public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms) { + if ((perms == FsAction.READ) || (perms == FsAction.WRITE) + || (perms == FsAction.READ_WRITE)) { + return false; } - - public static HCatStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { - return HCatUtil.getStorageHandler( - conf, - partitionInfo.getStorageHandlerClassName(), - partitionInfo.getSerdeClassName(), - partitionInfo.getInputFormatClassName(), - partitionInfo.getOutputFormatClassName()); + return true; + } + + public static Token getJobTrackerDelegationToken( + Configuration conf, String userName) throws Exception { + // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")"); + JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class)); + Token t = jcl + .getDelegationToken(new Text(userName)); + // LOG.info("got "+t); + return t; + + // return null; + } + + public static Token extractThriftToken( + String tokenStrForm, String tokenSignature) throws MetaException, + TException, IOException { + // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")"); + Token t = new Token(); + t.decodeFromUrlString(tokenStrForm); + t.setService(new Text(tokenSignature)); + // LOG.info("returning "+t); + return t; + } + + /** + * Create an instance of a storage handler defined in storerInfo. If one cannot be found + * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. + * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. + * @param conf job's configuration will be used to configure the Configurable StorageHandler + * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe + * @return storageHandler instance + * @throws IOException + */ + public static HCatStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { + return getStorageHandler(conf, + storerInfo.getStorageHandlerClass(), + storerInfo.getSerdeClass(), + storerInfo.getIfClass(), + storerInfo.getOfClass()); + } + + public static HCatStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { + return HCatUtil.getStorageHandler( + conf, + partitionInfo.getStorageHandlerClassName(), + partitionInfo.getSerdeClassName(), + partitionInfo.getInputFormatClassName(), + partitionInfo.getOutputFormatClassName()); + } + + /** + * Create an instance of a storage handler. If storageHandler == null, + * then surrrogate StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. + * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. + * @param conf job's configuration will be used to configure the Configurable StorageHandler + * @param storageHandler fully qualified class name of the desired StorageHandle instance + * @param serDe fully qualified class name of the desired SerDe instance + * @param inputFormat fully qualified class name of the desired InputFormat instance + * @param outputFormat fully qualified class name of the desired outputFormat instance + * @return storageHandler instance + * @throws IOException + */ + public static HCatStorageHandler getStorageHandler(Configuration conf, + String storageHandler, + String serDe, + String inputFormat, + String outputFormat) + throws IOException { + + if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) { + try { + FosterStorageHandler fosterStorageHandler = + new FosterStorageHandler(inputFormat, outputFormat, serDe); + fosterStorageHandler.setConf(conf); + return fosterStorageHandler; + } catch (ClassNotFoundException e) { + throw new IOException("Failed to load " + + "foster storage handler", e); + } } - /** - * Create an instance of a storage handler. If storageHandler == null, - * then surrrogate StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. - * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. - * @param conf job's configuration will be used to configure the Configurable StorageHandler - * @param storageHandler fully qualified class name of the desired StorageHandle instance - * @param serDe fully qualified class name of the desired SerDe instance - * @param inputFormat fully qualified class name of the desired InputFormat instance - * @param outputFormat fully qualified class name of the desired outputFormat instance - * @return storageHandler instance - * @throws IOException - */ - public static HCatStorageHandler getStorageHandler(Configuration conf, - String storageHandler, - String serDe, - String inputFormat, - String outputFormat) - throws IOException { - - if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) { - try { - FosterStorageHandler fosterStorageHandler = - new FosterStorageHandler(inputFormat, outputFormat, serDe); - fosterStorageHandler.setConf(conf); - return fosterStorageHandler; - } catch (ClassNotFoundException e) { - throw new IOException("Failed to load " - + "foster storage handler", e); - } - } - - try { - Class handlerClass = - (Class) Class - .forName(storageHandler, true, JavaUtils.getClassLoader()); - return (HCatStorageHandler) ReflectionUtils.newInstance( - handlerClass, conf); - } catch (ClassNotFoundException e) { - throw new IOException("Error in loading storage handler." - + e.getMessage(), e); - } + try { + Class handlerClass = + (Class) Class + .forName(storageHandler, true, JavaUtils.getClassLoader()); + return (HCatStorageHandler) ReflectionUtils.newInstance( + handlerClass, conf); + } catch (ClassNotFoundException e) { + throw new IOException("Error in loading storage handler." + + e.getMessage(), e); } - - public static Pair getDbAndTableName(String tableName) throws IOException { - String[] dbTableNametokens = tableName.split("\\."); - if (dbTableNametokens.length == 1) { - return new Pair(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - } else if (dbTableNametokens.length == 2) { - return new Pair(dbTableNametokens[0], dbTableNametokens[1]); - } else { - throw new IOException("tableName expected in the form " - + ". or
. Got " + tableName); - } + } + + public static Pair getDbAndTableName(String tableName) throws IOException { + String[] dbTableNametokens = tableName.split("\\."); + if (dbTableNametokens.length == 1) { + return new Pair(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + } else if (dbTableNametokens.length == 2) { + return new Pair(dbTableNametokens[0], dbTableNametokens[1]); + } else { + throw new IOException("tableName expected in the form " + + ".
or
. Got " + tableName); + } + } + + public static Map + getInputJobProperties(HCatStorageHandler storageHandler, + InputJobInfo inputJobInfo) { + TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), + storageHandler.getInputFormatClass(), + storageHandler.getOutputFormatClass(), + inputJobInfo.getTableInfo().getStorerInfo().getProperties()); + if (tableDesc.getJobProperties() == null) { + tableDesc.setJobProperties(new HashMap()); } - public static Map - getInputJobProperties(HCatStorageHandler storageHandler, - InputJobInfo inputJobInfo) { - TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), - storageHandler.getInputFormatClass(), - storageHandler.getOutputFormatClass(), - inputJobInfo.getTableInfo().getStorerInfo().getProperties()); - if (tableDesc.getJobProperties() == null) { - tableDesc.setJobProperties(new HashMap()); - } - - Map jobProperties = new HashMap(); - try { - tableDesc.getJobProperties().put( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(inputJobInfo)); - - storageHandler.configureInputJobProperties(tableDesc, - jobProperties); + Map jobProperties = new HashMap(); + try { + tableDesc.getJobProperties().put( + HCatConstants.HCAT_KEY_JOB_INFO, + HCatUtil.serialize(inputJobInfo)); - } catch (IOException e) { - throw new IllegalStateException( - "Failed to configure StorageHandler", e); - } + storageHandler.configureInputJobProperties(tableDesc, + jobProperties); - return jobProperties; + } catch (IOException e) { + throw new IllegalStateException( + "Failed to configure StorageHandler", e); } - @InterfaceAudience.Private - @InterfaceStability.Evolving - public static void - configureOutputStorageHandler(HCatStorageHandler storageHandler, - Configuration conf, - OutputJobInfo outputJobInfo) { - //TODO replace IgnoreKeyTextOutputFormat with a - //HiveOutputFormatWrapper in StorageHandler - TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), - storageHandler.getInputFormatClass(), - IgnoreKeyTextOutputFormat.class, - outputJobInfo.getTableInfo().getStorerInfo().getProperties()); - if (tableDesc.getJobProperties() == null) - tableDesc.setJobProperties(new HashMap()); - for (Map.Entry el : conf) { - tableDesc.getJobProperties().put(el.getKey(), el.getValue()); - } - - Map jobProperties = new HashMap(); - try { - tableDesc.getJobProperties().put( - HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - - storageHandler.configureOutputJobProperties(tableDesc, - jobProperties); - - for (Map.Entry el : jobProperties.entrySet()) { - conf.set(el.getKey(), el.getValue()); - } - } catch (IOException e) { - throw new IllegalStateException( - "Failed to configure StorageHandler", e); - } + return jobProperties; + } + + @InterfaceAudience.Private + @InterfaceStability.Evolving + public static void + configureOutputStorageHandler(HCatStorageHandler storageHandler, + Configuration conf, + OutputJobInfo outputJobInfo) { + //TODO replace IgnoreKeyTextOutputFormat with a + //HiveOutputFormatWrapper in StorageHandler + TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), + storageHandler.getInputFormatClass(), + IgnoreKeyTextOutputFormat.class, + outputJobInfo.getTableInfo().getStorerInfo().getProperties()); + if (tableDesc.getJobProperties() == null) + tableDesc.setJobProperties(new HashMap()); + for (Map.Entry el : conf) { + tableDesc.getJobProperties().put(el.getKey(), el.getValue()); } - /** - * Replace the contents of dest with the contents of src - * @param src - * @param dest - */ - public static void copyConf(Configuration src, Configuration dest) { - dest.clear(); - for (Map.Entry el : src) { - dest.set(el.getKey(), el.getValue()); - } + Map jobProperties = new HashMap(); + try { + tableDesc.getJobProperties().put( + HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + + storageHandler.configureOutputJobProperties(tableDesc, + jobProperties); + + for (Map.Entry el : jobProperties.entrySet()) { + conf.set(el.getKey(), el.getValue()); + } + } catch (IOException e) { + throw new IllegalStateException( + "Failed to configure StorageHandler", e); } - - /** - * Get or create a hive client depending on whether it exits in cache or not - * @param hiveConf The hive configuration - * @return the client - * @throws MetaException When HiveMetaStoreClient couldn't be created - * @throws IOException - */ - public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf) - throws MetaException, IOException { - - // Singleton behaviour: create the cache instance if required. The cache needs to be created lazily and - // using the expiry time available in hiveConf. - + } + + /** + * Replace the contents of dest with the contents of src + * @param src + * @param dest + */ + public static void copyConf(Configuration src, Configuration dest) { + dest.clear(); + for (Map.Entry el : src) { + dest.set(el.getKey(), el.getValue()); + } + } + + /** + * Get or create a hive client depending on whether it exits in cache or not + * @param hiveConf The hive configuration + * @return the client + * @throws MetaException When HiveMetaStoreClient couldn't be created + * @throws IOException + */ + public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf) + throws MetaException, IOException { + + // Singleton behaviour: create the cache instance if required. The cache needs to be created lazily and + // using the expiry time available in hiveConf. + + if (hiveClientCache == null) { + synchronized (HiveMetaStoreClient.class) { if (hiveClientCache == null) { - synchronized (HiveMetaStoreClient.class) { - if (hiveClientCache == null) { - hiveClientCache = new HiveClientCache(hiveConf.getInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, - DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS)); - } - } - } - try { - return hiveClientCache.get(hiveConf); - } catch (LoginException e) { - throw new IOException("Couldn't create hiveMetaStoreClient, Error getting UGI for user", e); + hiveClientCache = new HiveClientCache(hiveConf.getInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, + DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS)); } + } } - - public static void closeHiveClientQuietly(HiveMetaStoreClient client) { - try { - if (client != null) - client.close(); - } catch (Exception e) { - LOG.debug("Error closing metastore client. Ignored the error.", e); - } + try { + return hiveClientCache.get(hiveConf); + } catch (LoginException e) { + throw new IOException("Couldn't create hiveMetaStoreClient, Error getting UGI for user", e); } - - public static HiveConf getHiveConf(Configuration conf) - throws IOException { - - HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); - - //copy the hive conf into the job conf and restore it - //in the backend context - if (conf.get(HCatConstants.HCAT_KEY_HIVE_CONF) == null) { - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - } else { - //Copy configuration properties into the hive conf - Properties properties = (Properties) HCatUtil.deserialize( - conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); - - for (Map.Entry prop : properties.entrySet()) { - if (prop.getValue() instanceof String) { - hiveConf.set((String) prop.getKey(), (String) prop.getValue()); - } else if (prop.getValue() instanceof Integer) { - hiveConf.setInt((String) prop.getKey(), - (Integer) prop.getValue()); - } else if (prop.getValue() instanceof Boolean) { - hiveConf.setBoolean((String) prop.getKey(), - (Boolean) prop.getValue()); - } else if (prop.getValue() instanceof Long) { - hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue()); - } else if (prop.getValue() instanceof Float) { - hiveConf.setFloat((String) prop.getKey(), - (Float) prop.getValue()); - } - } - } - - if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - hiveConf.set("hive.metastore.token.signature", - conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); + } + + public static void closeHiveClientQuietly(HiveMetaStoreClient client) { + try { + if (client != null) + client.close(); + } catch (Exception e) { + LOG.debug("Error closing metastore client. Ignored the error.", e); + } + } + + public static HiveConf getHiveConf(Configuration conf) + throws IOException { + + HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); + + //copy the hive conf into the job conf and restore it + //in the backend context + if (conf.get(HCatConstants.HCAT_KEY_HIVE_CONF) == null) { + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + } else { + //Copy configuration properties into the hive conf + Properties properties = (Properties) HCatUtil.deserialize( + conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); + + for (Map.Entry prop : properties.entrySet()) { + if (prop.getValue() instanceof String) { + hiveConf.set((String) prop.getKey(), (String) prop.getValue()); + } else if (prop.getValue() instanceof Integer) { + hiveConf.setInt((String) prop.getKey(), + (Integer) prop.getValue()); + } else if (prop.getValue() instanceof Boolean) { + hiveConf.setBoolean((String) prop.getKey(), + (Boolean) prop.getValue()); + } else if (prop.getValue() instanceof Long) { + hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue()); + } else if (prop.getValue() instanceof Float) { + hiveConf.setFloat((String) prop.getKey(), + (Float) prop.getValue()); } + } + } - return hiveConf; + if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + hiveConf.set("hive.metastore.token.signature", + conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); } + return hiveConf; + } - public static JobConf getJobConfFromContext(JobContext jobContext) { - JobConf jobConf; - // we need to convert the jobContext into a jobConf - // 0.18 jobConf (Hive) vs 0.20+ jobContext (HCat) - // begin conversion.. - jobConf = new JobConf(jobContext.getConfiguration()); - // ..end of conversion + public static JobConf getJobConfFromContext(JobContext jobContext) { + JobConf jobConf; + // we need to convert the jobContext into a jobConf + // 0.18 jobConf (Hive) vs 0.20+ jobContext (HCat) + // begin conversion.. + jobConf = new JobConf(jobContext.getConfiguration()); + // ..end of conversion - return jobConf; - } - public static void copyJobPropertiesToJobConf( - Map jobProperties, JobConf jobConf) { - for (Map.Entry entry : jobProperties.entrySet()) { - jobConf.set(entry.getKey(), entry.getValue()); - } + return jobConf; + } + + public static void copyJobPropertiesToJobConf( + Map jobProperties, JobConf jobConf) { + for (Map.Entry entry : jobProperties.entrySet()) { + jobConf.set(entry.getKey(), entry.getValue()); } + } - public static boolean isHadoop23() { - String version = org.apache.hadoop.util.VersionInfo.getVersion(); - if (version.matches("\\b0\\.23\\..+\\b")||version.matches("\\b2\\..*")) - return true; - return false; - } + public static boolean isHadoop23() { + String version = org.apache.hadoop.util.VersionInfo.getVersion(); + if (version.matches("\\b0\\.23\\..+\\b")||version.matches("\\b2\\..*")) + return true; + return false; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HiveClientCache.java b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HiveClientCache.java index 96fe8f1..153a394 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/common/HiveClientCache.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/common/HiveClientCache.java @@ -47,292 +47,292 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.common.HiveClientCache} instead */ class HiveClientCache { - final private Cache hiveCache; - private static final Logger LOG = LoggerFactory.getLogger(HiveClientCache.class); - private final int timeout; - // This lock is used to make sure removalListener won't close a client that is being contemplated for returning by get() - private final Object CACHE_TEARDOWN_LOCK = new Object(); + final private Cache hiveCache; + private static final Logger LOG = LoggerFactory.getLogger(HiveClientCache.class); + private final int timeout; + // This lock is used to make sure removalListener won't close a client that is being contemplated for returning by get() + private final Object CACHE_TEARDOWN_LOCK = new Object(); - private static final AtomicInteger nextId = new AtomicInteger(0); + private static final AtomicInteger nextId = new AtomicInteger(0); - // Since HiveMetaStoreClient is not threadsafe, hive clients are not shared across threads. - // Thread local variable containing each thread's unique ID, is used as one of the keys for the cache - // causing each thread to get a different client even if the hiveConf is same. - private static final ThreadLocal threadId = - new ThreadLocal() { - @Override - protected Integer initialValue() { - return nextId.getAndIncrement(); - } - }; - - private int getThreadId() { - return threadId.get(); - } - - /** - * @param timeout the length of time in seconds after a client is created that it should be automatically removed - */ - public HiveClientCache(final int timeout) { - this.timeout = timeout; - RemovalListener removalListener = - new RemovalListener() { - public void onRemoval(RemovalNotification notification) { - CacheableHiveMetaStoreClient hiveMetaStoreClient = notification.getValue(); - if (hiveMetaStoreClient != null) { - synchronized (CACHE_TEARDOWN_LOCK) { - hiveMetaStoreClient.setExpiredFromCache(); - hiveMetaStoreClient.tearDownIfUnused(); - } - } - } - }; - hiveCache = CacheBuilder.newBuilder() - .expireAfterWrite(timeout, TimeUnit.SECONDS) - .removalListener(removalListener) - .build(); + // Since HiveMetaStoreClient is not threadsafe, hive clients are not shared across threads. + // Thread local variable containing each thread's unique ID, is used as one of the keys for the cache + // causing each thread to get a different client even if the hiveConf is same. + private static final ThreadLocal threadId = + new ThreadLocal() { + @Override + protected Integer initialValue() { + return nextId.getAndIncrement(); + } + }; - // Add a shutdown hook for cleanup, if there are elements remaining in the cache which were not cleaned up. - // This is the best effort approach. Ignore any error while doing so. Notice that most of the clients - // would get cleaned up via either the removalListener or the close() call, only the active clients - // that are in the cache or expired but being used in other threads wont get cleaned. The following code will only - // clean the active cache ones. The ones expired from cache but being hold by other threads are in the mercy - // of finalize() being called. - Thread cleanupHiveClientShutdownThread = new Thread() { - @Override - public void run() { - LOG.debug("Cleaning up hive client cache in ShutDown hook"); - closeAllClientsQuietly(); - } - }; - Runtime.getRuntime().addShutdownHook(cleanupHiveClientShutdownThread); - } + private int getThreadId() { + return threadId.get(); + } - /** - * Note: This doesn't check if they are being used or not, meant only to be called during shutdown etc. - */ - void closeAllClientsQuietly() { - try { - ConcurrentMap elements = hiveCache.asMap(); - for (CacheableHiveMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) { - cacheableHiveMetaStoreClient.tearDown(); + /** + * @param timeout the length of time in seconds after a client is created that it should be automatically removed + */ + public HiveClientCache(final int timeout) { + this.timeout = timeout; + RemovalListener removalListener = + new RemovalListener() { + public void onRemoval(RemovalNotification notification) { + CacheableHiveMetaStoreClient hiveMetaStoreClient = notification.getValue(); + if (hiveMetaStoreClient != null) { + synchronized (CACHE_TEARDOWN_LOCK) { + hiveMetaStoreClient.setExpiredFromCache(); + hiveMetaStoreClient.tearDownIfUnused(); } - } catch (Exception e) { - LOG.warn("Clean up of hive clients in the cache failed. Ignored", e); + } } - } + }; + hiveCache = CacheBuilder.newBuilder() + .expireAfterWrite(timeout, TimeUnit.SECONDS) + .removalListener(removalListener) + .build(); + + // Add a shutdown hook for cleanup, if there are elements remaining in the cache which were not cleaned up. + // This is the best effort approach. Ignore any error while doing so. Notice that most of the clients + // would get cleaned up via either the removalListener or the close() call, only the active clients + // that are in the cache or expired but being used in other threads wont get cleaned. The following code will only + // clean the active cache ones. The ones expired from cache but being hold by other threads are in the mercy + // of finalize() being called. + Thread cleanupHiveClientShutdownThread = new Thread() { + @Override + public void run() { + LOG.debug("Cleaning up hive client cache in ShutDown hook"); + closeAllClientsQuietly(); + } + }; + Runtime.getRuntime().addShutdownHook(cleanupHiveClientShutdownThread); + } - public void cleanup() { - hiveCache.cleanUp(); + /** + * Note: This doesn't check if they are being used or not, meant only to be called during shutdown etc. + */ + void closeAllClientsQuietly() { + try { + ConcurrentMap elements = hiveCache.asMap(); + for (CacheableHiveMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) { + cacheableHiveMetaStoreClient.tearDown(); + } + } catch (Exception e) { + LOG.warn("Clean up of hive clients in the cache failed. Ignored", e); } + } - /** - * Returns a cached client if exists or else creates one, caches and returns it. It also checks that the client is - * healthy and can be reused - * @param hiveConf - * @return the hive client - * @throws MetaException - * @throws IOException - * @throws LoginException - */ - public HiveMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException { - final HiveClientCacheKey cacheKey = HiveClientCacheKey.fromHiveConf(hiveConf, getThreadId()); - CacheableHiveMetaStoreClient hiveMetaStoreClient = null; - // the hmsc is not shared across threads. So the only way it could get closed while we are doing healthcheck - // is if removalListener closes it. The synchronization takes care that removalListener won't do it - synchronized (CACHE_TEARDOWN_LOCK) { - hiveMetaStoreClient = getOrCreate(cacheKey); - hiveMetaStoreClient.acquire(); - } - if (!hiveMetaStoreClient.isOpen()) { - synchronized (CACHE_TEARDOWN_LOCK) { - hiveCache.invalidate(cacheKey); - hiveMetaStoreClient.close(); - hiveMetaStoreClient = getOrCreate(cacheKey); - hiveMetaStoreClient.acquire(); - } - } - return hiveMetaStoreClient; + public void cleanup() { + hiveCache.cleanUp(); + } + + /** + * Returns a cached client if exists or else creates one, caches and returns it. It also checks that the client is + * healthy and can be reused + * @param hiveConf + * @return the hive client + * @throws MetaException + * @throws IOException + * @throws LoginException + */ + public HiveMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException { + final HiveClientCacheKey cacheKey = HiveClientCacheKey.fromHiveConf(hiveConf, getThreadId()); + CacheableHiveMetaStoreClient hiveMetaStoreClient = null; + // the hmsc is not shared across threads. So the only way it could get closed while we are doing healthcheck + // is if removalListener closes it. The synchronization takes care that removalListener won't do it + synchronized (CACHE_TEARDOWN_LOCK) { + hiveMetaStoreClient = getOrCreate(cacheKey); + hiveMetaStoreClient.acquire(); + } + if (!hiveMetaStoreClient.isOpen()) { + synchronized (CACHE_TEARDOWN_LOCK) { + hiveCache.invalidate(cacheKey); + hiveMetaStoreClient.close(); + hiveMetaStoreClient = getOrCreate(cacheKey); + hiveMetaStoreClient.acquire(); + } } + return hiveMetaStoreClient; + } - /** - * Return from cache if exists else create/cache and return - * @param cacheKey - * @return - * @throws IOException - * @throws MetaException - * @throws LoginException - */ - private CacheableHiveMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey) throws IOException, MetaException, LoginException { - try { - return hiveCache.get(cacheKey, new Callable() { - @Override - public CacheableHiveMetaStoreClient call() throws MetaException { - return new CacheableHiveMetaStoreClient(cacheKey.getHiveConf(), timeout); - } - }); - } catch (ExecutionException e) { - Throwable t = e.getCause(); - if (t instanceof IOException) { - throw (IOException) t; - } else if (t instanceof MetaException) { - throw (MetaException) t; - } else if (t instanceof LoginException) { - throw (LoginException) t; - } else { - throw new IOException("Error creating hiveMetaStoreClient", t); - } + /** + * Return from cache if exists else create/cache and return + * @param cacheKey + * @return + * @throws IOException + * @throws MetaException + * @throws LoginException + */ + private CacheableHiveMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey) throws IOException, MetaException, LoginException { + try { + return hiveCache.get(cacheKey, new Callable() { + @Override + public CacheableHiveMetaStoreClient call() throws MetaException { + return new CacheableHiveMetaStoreClient(cacheKey.getHiveConf(), timeout); } + }); + } catch (ExecutionException e) { + Throwable t = e.getCause(); + if (t instanceof IOException) { + throw (IOException) t; + } else if (t instanceof MetaException) { + throw (MetaException) t; + } else if (t instanceof LoginException) { + throw (LoginException) t; + } else { + throw new IOException("Error creating hiveMetaStoreClient", t); + } } + } - /** - * A class to wrap HiveConf and expose equality based only on UserGroupInformation and the metaStoreURIs. - * This becomes the key for the cache and this way the same HiveMetaStoreClient would be returned if - * UserGroupInformation and metaStoreURIs are same. This function can evolve to express - * the cases when HiveConf is different but the same hiveMetaStoreClient can be used - */ - public static class HiveClientCacheKey { - final private String metaStoreURIs; - final private UserGroupInformation ugi; - final private HiveConf hiveConf; - final private int threadId; + /** + * A class to wrap HiveConf and expose equality based only on UserGroupInformation and the metaStoreURIs. + * This becomes the key for the cache and this way the same HiveMetaStoreClient would be returned if + * UserGroupInformation and metaStoreURIs are same. This function can evolve to express + * the cases when HiveConf is different but the same hiveMetaStoreClient can be used + */ + public static class HiveClientCacheKey { + final private String metaStoreURIs; + final private UserGroupInformation ugi; + final private HiveConf hiveConf; + final private int threadId; - private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { - this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); - ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); - this.hiveConf = hiveConf; - this.threadId = threadId; - } + private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { + this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); + ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + this.hiveConf = hiveConf; + this.threadId = threadId; + } - public static HiveClientCacheKey fromHiveConf(HiveConf hiveConf, final int threadId) throws IOException, LoginException { - return new HiveClientCacheKey(hiveConf, threadId); - } + public static HiveClientCacheKey fromHiveConf(HiveConf hiveConf, final int threadId) throws IOException, LoginException { + return new HiveClientCacheKey(hiveConf, threadId); + } - public HiveConf getHiveConf() { - return hiveConf; - } + public HiveConf getHiveConf() { + return hiveConf; + } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - HiveClientCacheKey that = (HiveClientCacheKey) o; - return new EqualsBuilder(). - append(this.metaStoreURIs, - that.metaStoreURIs). - append(this.ugi, that.ugi). - append(this.threadId, that.threadId).isEquals(); - } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + HiveClientCacheKey that = (HiveClientCacheKey) o; + return new EqualsBuilder(). + append(this.metaStoreURIs, + that.metaStoreURIs). + append(this.ugi, that.ugi). + append(this.threadId, that.threadId).isEquals(); + } - @Override - public int hashCode() { - return new HashCodeBuilder(). - append(metaStoreURIs). - append(ugi). - append(threadId).toHashCode(); - } + @Override + public int hashCode() { + return new HashCodeBuilder(). + append(metaStoreURIs). + append(ugi). + append(threadId).toHashCode(); } + } - /** - * Add # of current users on HiveMetaStoreClient, so that the client can be cleaned when no one is using it. - */ - public static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient { - private AtomicInteger users = new AtomicInteger(0); - private volatile boolean expiredFromCache = false; - private boolean isClosed = false; - private final long expiryTime; - private static final int EXPIRY_TIME_EXTENSION_IN_MILLIS = 60 * 1000; + /** + * Add # of current users on HiveMetaStoreClient, so that the client can be cleaned when no one is using it. + */ + public static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient { + private AtomicInteger users = new AtomicInteger(0); + private volatile boolean expiredFromCache = false; + private boolean isClosed = false; + private final long expiryTime; + private static final int EXPIRY_TIME_EXTENSION_IN_MILLIS = 60 * 1000; - public CacheableHiveMetaStoreClient(final HiveConf conf, final int timeout) throws MetaException { - super(conf); - // Extend the expiry time with some extra time on top of guava expiry time to make sure - // that items closed() are for sure expired and would never be returned by guava. - this.expiryTime = System.currentTimeMillis() + timeout * 1000 + EXPIRY_TIME_EXTENSION_IN_MILLIS; - } + public CacheableHiveMetaStoreClient(final HiveConf conf, final int timeout) throws MetaException { + super(conf); + // Extend the expiry time with some extra time on top of guava expiry time to make sure + // that items closed() are for sure expired and would never be returned by guava. + this.expiryTime = System.currentTimeMillis() + timeout * 1000 + EXPIRY_TIME_EXTENSION_IN_MILLIS; + } - private void acquire() { - users.incrementAndGet(); - } + private void acquire() { + users.incrementAndGet(); + } - private void release() { - users.decrementAndGet(); - } + private void release() { + users.decrementAndGet(); + } - public void setExpiredFromCache() { - expiredFromCache = true; - } + public void setExpiredFromCache() { + expiredFromCache = true; + } - public boolean isClosed() { - return isClosed; - } + public boolean isClosed() { + return isClosed; + } - /** - * Make a call to hive meta store and see if the client is still usable. Some calls where the user provides - * invalid data renders the client unusable for future use (example: create a table with very long table name) - * @return - */ - protected boolean isOpen() { - try { - // Look for an unlikely database name and see if either MetaException or TException is thrown - this.getDatabase("NonExistentDatabaseUsedForHealthCheck"); - } catch (NoSuchObjectException e) { - return true; // It is okay if the database doesn't exist - } catch (MetaException e) { - return false; - } catch (TException e) { - return false; - } - return true; - } + /** + * Make a call to hive meta store and see if the client is still usable. Some calls where the user provides + * invalid data renders the client unusable for future use (example: create a table with very long table name) + * @return + */ + protected boolean isOpen() { + try { + // Look for an unlikely database name and see if either MetaException or TException is thrown + this.getDatabase("NonExistentDatabaseUsedForHealthCheck"); + } catch (NoSuchObjectException e) { + return true; // It is okay if the database doesn't exist + } catch (MetaException e) { + return false; + } catch (TException e) { + return false; + } + return true; + } - /** - * Decrement the user count and piggyback this to set expiry flag as well, then teardown(), if conditions are met. - * This *MUST* be called by anyone who uses this client. - */ - @Override - public void close() { - release(); - if (System.currentTimeMillis() >= expiryTime) - setExpiredFromCache(); - tearDownIfUnused(); - } + /** + * Decrement the user count and piggyback this to set expiry flag as well, then teardown(), if conditions are met. + * This *MUST* be called by anyone who uses this client. + */ + @Override + public void close() { + release(); + if (System.currentTimeMillis() >= expiryTime) + setExpiredFromCache(); + tearDownIfUnused(); + } - /** - * Tear down only if - * 1. There are no active user - * 2. It has expired from the cache - */ - private void tearDownIfUnused() { - if (users.get() == 0 && expiredFromCache) { - this.tearDown(); - } - } + /** + * Tear down only if + * 1. There are no active user + * 2. It has expired from the cache + */ + private void tearDownIfUnused() { + if (users.get() == 0 && expiredFromCache) { + this.tearDown(); + } + } - /** - * Close if not closed already - */ - protected synchronized void tearDown() { - try { - if (!isClosed) { - super.close(); - } - isClosed = true; - } catch (Exception e) { - LOG.warn("Error closing hive metastore client. Ignored.", e); - } + /** + * Close if not closed already + */ + protected synchronized void tearDown() { + try { + if (!isClosed) { + super.close(); } + isClosed = true; + } catch (Exception e) { + LOG.warn("Error closing hive metastore client. Ignored.", e); + } + } - /** - * Last effort to clean up, may not even get called. - * @throws Throwable - */ - @Override - protected void finalize() throws Throwable { - try { - this.tearDown(); - } finally { - super.finalize(); - } - } + /** + * Last effort to clean up, may not even get called. + * @throws Throwable + */ + @Override + protected void finalize() throws Throwable { + try { + this.tearDown(); + } finally { + super.finalize(); + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/DataType.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/DataType.java index 6b435fb..ba84864 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/DataType.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/DataType.java @@ -30,180 +30,180 @@ */ public abstract class DataType { - public static final byte NULL = 1; - public static final byte BOOLEAN = 5; - public static final byte BYTE = 6; - public static final byte INTEGER = 10; - public static final byte SHORT = 11; - public static final byte LONG = 15; - public static final byte FLOAT = 20; - public static final byte DOUBLE = 25; - public static final byte STRING = 55; - public static final byte BINARY = 60; - - public static final byte MAP = 100; - public static final byte STRUCT = 110; - public static final byte LIST = 120; - public static final byte ERROR = -1; - - /** - * Determine the datatype of an object. - * @param o Object to test. - * @return byte code of the type, or ERROR if we don't know. - */ - public static byte findType(Object o) { - if (o == null) { - return NULL; - } + public static final byte NULL = 1; + public static final byte BOOLEAN = 5; + public static final byte BYTE = 6; + public static final byte INTEGER = 10; + public static final byte SHORT = 11; + public static final byte LONG = 15; + public static final byte FLOAT = 20; + public static final byte DOUBLE = 25; + public static final byte STRING = 55; + public static final byte BINARY = 60; + + public static final byte MAP = 100; + public static final byte STRUCT = 110; + public static final byte LIST = 120; + public static final byte ERROR = -1; + + /** + * Determine the datatype of an object. + * @param o Object to test. + * @return byte code of the type, or ERROR if we don't know. + */ + public static byte findType(Object o) { + if (o == null) { + return NULL; + } - Class clazz = o.getClass(); - - // Try to put the most common first - if (clazz == String.class) { - return STRING; - } else if (clazz == Integer.class) { - return INTEGER; - } else if (clazz == Long.class) { - return LONG; - } else if (clazz == Float.class) { - return FLOAT; - } else if (clazz == Double.class) { - return DOUBLE; - } else if (clazz == Boolean.class) { - return BOOLEAN; - } else if (clazz == Byte.class) { - return BYTE; - } else if (clazz == Short.class) { - return SHORT; - } else if (o instanceof List) { - return LIST; - } else if (o instanceof Map) { - return MAP; - } else if (o instanceof byte[]) { - return BINARY; - } else { - return ERROR; - } + Class clazz = o.getClass(); + + // Try to put the most common first + if (clazz == String.class) { + return STRING; + } else if (clazz == Integer.class) { + return INTEGER; + } else if (clazz == Long.class) { + return LONG; + } else if (clazz == Float.class) { + return FLOAT; + } else if (clazz == Double.class) { + return DOUBLE; + } else if (clazz == Boolean.class) { + return BOOLEAN; + } else if (clazz == Byte.class) { + return BYTE; + } else if (clazz == Short.class) { + return SHORT; + } else if (o instanceof List) { + return LIST; + } else if (o instanceof Map) { + return MAP; + } else if (o instanceof byte[]) { + return BINARY; + } else { + return ERROR; } + } - public static int compare(Object o1, Object o2) { + public static int compare(Object o1, Object o2) { - return compare(o1, o2, findType(o1), findType(o2)); - } + return compare(o1, o2, findType(o1), findType(o2)); + } - public static int compare(Object o1, Object o2, byte dt1, byte dt2) { - if (dt1 == dt2) { - switch (dt1) { - case NULL: - return 0; - - case BOOLEAN: - return ((Boolean) o1).compareTo((Boolean) o2); - - case BYTE: - return ((Byte) o1).compareTo((Byte) o2); - - case INTEGER: - return ((Integer) o1).compareTo((Integer) o2); - - case LONG: - return ((Long) o1).compareTo((Long) o2); - - case FLOAT: - return ((Float) o1).compareTo((Float) o2); - - case DOUBLE: - return ((Double) o1).compareTo((Double) o2); - - case STRING: - return ((String) o1).compareTo((String) o2); - - case SHORT: - return ((Short) o1).compareTo((Short) o2); - - case BINARY: - return compareByteArray((byte[]) o1, (byte[]) o2); - - case LIST: - List l1 = (List) o1; - List l2 = (List) o2; - int len = l1.size(); - if (len != l2.size()) { - return len - l2.size(); - } else { - for (int i = 0; i < len; i++) { - int cmpVal = compare(l1.get(i), l2.get(i)); - if (cmpVal != 0) { - return cmpVal; - } - } - return 0; - } - - case MAP: { - Map m1 = (Map) o1; - Map m2 = (Map) o2; - int sz1 = m1.size(); - int sz2 = m2.size(); - if (sz1 < sz2) { - return -1; - } else if (sz1 > sz2) { - return 1; - } else { - // This is bad, but we have to sort the keys of the maps in order - // to be commutative. - TreeMap tm1 = new TreeMap(m1); - TreeMap tm2 = new TreeMap(m2); - Iterator> i1 = tm1.entrySet().iterator(); - Iterator> i2 = tm2.entrySet().iterator(); - while (i1.hasNext()) { - Map.Entry entry1 = i1.next(); - Map.Entry entry2 = i2.next(); - int c = compare(entry1.getValue(), entry2.getValue()); - if (c != 0) { - return c; - } else { - c = compare(entry1.getValue(), entry2.getValue()); - if (c != 0) { - return c; - } - } - } - return 0; - } - } + public static int compare(Object o1, Object o2, byte dt1, byte dt2) { + if (dt1 == dt2) { + switch (dt1) { + case NULL: + return 0; - default: - throw new RuntimeException("Unkown type " + dt1 + - " in compare"); - } - } else { - return dt1 < dt2 ? -1 : 1; - } - } + case BOOLEAN: + return ((Boolean) o1).compareTo((Boolean) o2); - private static int compareByteArray(byte[] o1, byte[] o2) { + case BYTE: + return ((Byte) o1).compareTo((Byte) o2); - for (int i = 0; i < o1.length; i++) { - if (i == o2.length) { - return 1; - } - if (o1[i] == o2[i]) { - continue; + case INTEGER: + return ((Integer) o1).compareTo((Integer) o2); + + case LONG: + return ((Long) o1).compareTo((Long) o2); + + case FLOAT: + return ((Float) o1).compareTo((Float) o2); + + case DOUBLE: + return ((Double) o1).compareTo((Double) o2); + + case STRING: + return ((String) o1).compareTo((String) o2); + + case SHORT: + return ((Short) o1).compareTo((Short) o2); + + case BINARY: + return compareByteArray((byte[]) o1, (byte[]) o2); + + case LIST: + List l1 = (List) o1; + List l2 = (List) o2; + int len = l1.size(); + if (len != l2.size()) { + return len - l2.size(); + } else { + for (int i = 0; i < len; i++) { + int cmpVal = compare(l1.get(i), l2.get(i)); + if (cmpVal != 0) { + return cmpVal; } - if (o1[i] > o1[i]) { - return 1; + } + return 0; + } + + case MAP: { + Map m1 = (Map) o1; + Map m2 = (Map) o2; + int sz1 = m1.size(); + int sz2 = m2.size(); + if (sz1 < sz2) { + return -1; + } else if (sz1 > sz2) { + return 1; + } else { + // This is bad, but we have to sort the keys of the maps in order + // to be commutative. + TreeMap tm1 = new TreeMap(m1); + TreeMap tm2 = new TreeMap(m2); + Iterator> i1 = tm1.entrySet().iterator(); + Iterator> i2 = tm2.entrySet().iterator(); + while (i1.hasNext()) { + Map.Entry entry1 = i1.next(); + Map.Entry entry2 = i2.next(); + int c = compare(entry1.getValue(), entry2.getValue()); + if (c != 0) { + return c; } else { - return -1; + c = compare(entry1.getValue(), entry2.getValue()); + if (c != 0) { + return c; + } } + } + return 0; } + } + + default: + throw new RuntimeException("Unkown type " + dt1 + + " in compare"); + } + } else { + return dt1 < dt2 ? -1 : 1; + } + } + + private static int compareByteArray(byte[] o1, byte[] o2) { + + for (int i = 0; i < o1.length; i++) { + if (i == o2.length) { + return 1; + } + if (o1[i] == o2[i]) { + continue; + } + if (o1[i] > o1[i]) { + return 1; + } else { + return -1; + } + } - //bytes in o1 are same as o2 - //in case o2 was longer - if (o2.length > o1.length) { - return -1; - } - return 0; //equals + //bytes in o1 are same as o2 + //in case o2 was longer + if (o2.length > o1.length) { + return -1; } + return 0; //equals + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/DefaultHCatRecord.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/DefaultHCatRecord.java index dc211b1..51c9b2e 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/DefaultHCatRecord.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/DefaultHCatRecord.java @@ -32,102 +32,102 @@ */ public class DefaultHCatRecord extends HCatRecord { - private List contents; + private List contents; - public DefaultHCatRecord() { - contents = new ArrayList(); - } - - public DefaultHCatRecord(int size) { - contents = new ArrayList(size); - for (int i = 0; i < size; i++) { - contents.add(null); - } - } - - @Override - public void remove(int idx) throws HCatException { - contents.remove(idx); - } - - public DefaultHCatRecord(List list) { - contents = list; - } - - @Override - public Object get(int fieldNum) { - return contents.get(fieldNum); - } - - @Override - public List getAll() { - return contents; - } + public DefaultHCatRecord() { + contents = new ArrayList(); + } - @Override - public void set(int fieldNum, Object val) { - contents.set(fieldNum, val); + public DefaultHCatRecord(int size) { + contents = new ArrayList(size); + for (int i = 0; i < size; i++) { + contents.add(null); } - - @Override - public int size() { - return contents.size(); + } + + @Override + public void remove(int idx) throws HCatException { + contents.remove(idx); + } + + public DefaultHCatRecord(List list) { + contents = list; + } + + @Override + public Object get(int fieldNum) { + return contents.get(fieldNum); + } + + @Override + public List getAll() { + return contents; + } + + @Override + public void set(int fieldNum, Object val) { + contents.set(fieldNum, val); + } + + @Override + public int size() { + return contents.size(); + } + + @Override + public void readFields(DataInput in) throws IOException { + + contents.clear(); + int len = in.readInt(); + for (int i = 0; i < len; i++) { + contents.add(ReaderWriter.readDatum(in)); } - - @Override - public void readFields(DataInput in) throws IOException { - - contents.clear(); - int len = in.readInt(); - for (int i = 0; i < len; i++) { - contents.add(ReaderWriter.readDatum(in)); - } + } + + @Override + public void write(DataOutput out) throws IOException { + int sz = size(); + out.writeInt(sz); + for (int i = 0; i < sz; i++) { + ReaderWriter.writeDatum(out, contents.get(i)); } - @Override - public void write(DataOutput out) throws IOException { - int sz = size(); - out.writeInt(sz); - for (int i = 0; i < sz; i++) { - ReaderWriter.writeDatum(out, contents.get(i)); - } + } + @Override + public int hashCode() { + int hash = 1; + for (Object o : contents) { + if (o != null) { + hash = 31 * hash + o.hashCode(); + } } + return hash; + } - @Override - public int hashCode() { - int hash = 1; - for (Object o : contents) { - if (o != null) { - hash = 31 * hash + o.hashCode(); - } - } - return hash; - } - - @Override - public String toString() { - - StringBuilder sb = new StringBuilder(); - for (Object o : contents) { - sb.append(o + "\t"); - } - return sb.toString(); - } - - @Override - public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { - return get(recordSchema.getPosition(fieldName)); - } - - @Override - public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { - set(recordSchema.getPosition(fieldName), value); - } + @Override + public String toString() { - @Override - public void copy(HCatRecord r) throws HCatException { - this.contents = r.getAll(); + StringBuilder sb = new StringBuilder(); + for (Object o : contents) { + sb.append(o + "\t"); } + return sb.toString(); + } + + @Override + public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { + return get(recordSchema.getPosition(fieldName)); + } + + @Override + public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { + set(recordSchema.getPosition(fieldName), value); + } + + @Override + public void copy(HCatRecord r) throws HCatException { + this.contents = r.getAll(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecord.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecord.java index 5cf57e0..c11f0a5 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecord.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecord.java @@ -35,117 +35,117 @@ */ public abstract class HCatRecord implements HCatRecordable { - public abstract Object get(String fieldName, HCatSchema recordSchema) throws HCatException; + public abstract Object get(String fieldName, HCatSchema recordSchema) throws HCatException; - public abstract void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException; + public abstract void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException; - public abstract void remove(int idx) throws HCatException; + public abstract void remove(int idx) throws HCatException; - public abstract void copy(HCatRecord r) throws HCatException; + public abstract void copy(HCatRecord r) throws HCatException; - protected Object get(String fieldName, HCatSchema recordSchema, Class clazz) throws HCatException { - // TODO : if needed, verify that recordschema entry for fieldname matches appropriate type. - return get(fieldName, recordSchema); - } + protected Object get(String fieldName, HCatSchema recordSchema, Class clazz) throws HCatException { + // TODO : if needed, verify that recordschema entry for fieldname matches appropriate type. + return get(fieldName, recordSchema); + } - public Boolean getBoolean(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Boolean) get(fieldName, recordSchema, Boolean.class); - } + public Boolean getBoolean(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Boolean) get(fieldName, recordSchema, Boolean.class); + } - public void setBoolean(String fieldName, HCatSchema recordSchema, Boolean value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setBoolean(String fieldName, HCatSchema recordSchema, Boolean value) throws HCatException { + set(fieldName, recordSchema, value); + } - public byte[] getByteArray(String fieldName, HCatSchema recordSchema) throws HCatException { - return (byte[]) get(fieldName, recordSchema, byte[].class); - } + public byte[] getByteArray(String fieldName, HCatSchema recordSchema) throws HCatException { + return (byte[]) get(fieldName, recordSchema, byte[].class); + } - public void setByteArray(String fieldName, HCatSchema recordSchema, byte[] value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setByteArray(String fieldName, HCatSchema recordSchema, byte[] value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Byte getByte(String fieldName, HCatSchema recordSchema) throws HCatException { - //TINYINT - return (Byte) get(fieldName, recordSchema, Byte.class); - } + public Byte getByte(String fieldName, HCatSchema recordSchema) throws HCatException { + //TINYINT + return (Byte) get(fieldName, recordSchema, Byte.class); + } - public void setByte(String fieldName, HCatSchema recordSchema, Byte value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setByte(String fieldName, HCatSchema recordSchema, Byte value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Short getShort(String fieldName, HCatSchema recordSchema) throws HCatException { - // SMALLINT - return (Short) get(fieldName, recordSchema, Short.class); - } + public Short getShort(String fieldName, HCatSchema recordSchema) throws HCatException { + // SMALLINT + return (Short) get(fieldName, recordSchema, Short.class); + } - public void setShort(String fieldName, HCatSchema recordSchema, Short value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setShort(String fieldName, HCatSchema recordSchema, Short value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Integer getInteger(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Integer) get(fieldName, recordSchema, Integer.class); - } + public Integer getInteger(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Integer) get(fieldName, recordSchema, Integer.class); + } - public void setInteger(String fieldName, HCatSchema recordSchema, Integer value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setInteger(String fieldName, HCatSchema recordSchema, Integer value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Long getLong(String fieldName, HCatSchema recordSchema) throws HCatException { - // BIGINT - return (Long) get(fieldName, recordSchema, Long.class); - } + public Long getLong(String fieldName, HCatSchema recordSchema) throws HCatException { + // BIGINT + return (Long) get(fieldName, recordSchema, Long.class); + } - public void setLong(String fieldName, HCatSchema recordSchema, Long value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setLong(String fieldName, HCatSchema recordSchema, Long value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Float getFloat(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Float) get(fieldName, recordSchema, Float.class); - } + public Float getFloat(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Float) get(fieldName, recordSchema, Float.class); + } - public void setFloat(String fieldName, HCatSchema recordSchema, Float value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setFloat(String fieldName, HCatSchema recordSchema, Float value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Double getDouble(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Double) get(fieldName, recordSchema, Double.class); - } + public Double getDouble(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Double) get(fieldName, recordSchema, Double.class); + } - public void setDouble(String fieldName, HCatSchema recordSchema, Double value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setDouble(String fieldName, HCatSchema recordSchema, Double value) throws HCatException { + set(fieldName, recordSchema, value); + } - public String getString(String fieldName, HCatSchema recordSchema) throws HCatException { - return (String) get(fieldName, recordSchema, String.class); - } + public String getString(String fieldName, HCatSchema recordSchema) throws HCatException { + return (String) get(fieldName, recordSchema, String.class); + } - public void setString(String fieldName, HCatSchema recordSchema, String value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setString(String fieldName, HCatSchema recordSchema, String value) throws HCatException { + set(fieldName, recordSchema, value); + } - @SuppressWarnings("unchecked") - public List getStruct(String fieldName, HCatSchema recordSchema) throws HCatException { - return (List) get(fieldName, recordSchema, List.class); - } + @SuppressWarnings("unchecked") + public List getStruct(String fieldName, HCatSchema recordSchema) throws HCatException { + return (List) get(fieldName, recordSchema, List.class); + } - public void setStruct(String fieldName, HCatSchema recordSchema, List value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setStruct(String fieldName, HCatSchema recordSchema, List value) throws HCatException { + set(fieldName, recordSchema, value); + } - public List getList(String fieldName, HCatSchema recordSchema) throws HCatException { - return (List) get(fieldName, recordSchema, List.class); - } + public List getList(String fieldName, HCatSchema recordSchema) throws HCatException { + return (List) get(fieldName, recordSchema, List.class); + } - public void setList(String fieldName, HCatSchema recordSchema, List value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Map getMap(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Map) get(fieldName, recordSchema, Map.class); - } - - public void setMap(String fieldName, HCatSchema recordSchema, Map value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setList(String fieldName, HCatSchema recordSchema, List value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Map getMap(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Map) get(fieldName, recordSchema, Map.class); + } + + public void setMap(String fieldName, HCatSchema recordSchema, Map value) throws HCatException { + set(fieldName, recordSchema, value); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java index 3d138d4..383f9d4 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java @@ -29,28 +29,28 @@ */ public class HCatRecordObjectInspector extends StandardStructObjectInspector { - protected HCatRecordObjectInspector(List structFieldNames, - List structFieldObjectInspectors) { - super(structFieldNames, structFieldObjectInspectors); + protected HCatRecordObjectInspector(List structFieldNames, + List structFieldObjectInspectors) { + super(structFieldNames, structFieldObjectInspectors); + } + + @Override + public Object getStructFieldData(Object data, StructField fieldRef) { + if (data == null) { + return new IllegalArgumentException("Data passed in to get field from was null!"); } - @Override - public Object getStructFieldData(Object data, StructField fieldRef) { - if (data == null) { - return new IllegalArgumentException("Data passed in to get field from was null!"); - } - - int fieldID = ((MyField) fieldRef).getFieldID(); - if (!(fieldID >= 0 && fieldID < fields.size())) { - throw new IllegalArgumentException("Invalid field index [" + fieldID + "]"); - } - - return ((HCatRecord) data).get(fieldID); + int fieldID = ((MyField) fieldRef).getFieldID(); + if (!(fieldID >= 0 && fieldID < fields.size())) { + throw new IllegalArgumentException("Invalid field index [" + fieldID + "]"); } - @Override - public List getStructFieldsDataAsList(Object o) { - return ((HCatRecord) o).getAll(); - } + return ((HCatRecord) data).get(fieldID); + } + + @Override + public List getStructFieldsDataAsList(Object o) { + return ((HCatRecord) o).getAll(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java index da4b0f8..f4633b6 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java @@ -40,94 +40,94 @@ */ public class HCatRecordObjectInspectorFactory { - private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class); + private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class); - static HashMap cachedHCatRecordObjectInspectors = - new HashMap(); - static HashMap cachedObjectInspectors = - new HashMap(); + static HashMap cachedHCatRecordObjectInspectors = + new HashMap(); + static HashMap cachedObjectInspectors = + new HashMap(); - /** - * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into - * @param typeInfo Type definition for the record to look into - * @return appropriate HCatRecordObjectInspector - * @throws SerDeException - */ - public static HCatRecordObjectInspector getHCatRecordObjectInspector( - StructTypeInfo typeInfo) throws SerDeException { - HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo); - if (oi == null) { + /** + * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into + * @param typeInfo Type definition for the record to look into + * @return appropriate HCatRecordObjectInspector + * @throws SerDeException + */ + public static HCatRecordObjectInspector getHCatRecordObjectInspector( + StructTypeInfo typeInfo) throws SerDeException { + HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo); + if (oi == null) { - LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName()); - switch (typeInfo.getCategory()) { - case STRUCT: - StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List fieldNames = structTypeInfo.getAllStructFieldNames(); - List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size()); - for (int i = 0; i < fieldTypeInfos.size(); i++) { - fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); - } - oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors); - - break; - default: - // Hmm.. not good, - // the only type expected here is STRUCT, which maps to HCatRecord - // - anything else is an error. Return null as the inspector. - throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() - + "] was not of struct type - HCatRecord expected struct type, got [" - + typeInfo.getCategory().toString() + "]"); - } - cachedHCatRecordObjectInspectors.put(typeInfo, oi); + LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName()); + switch (typeInfo.getCategory()) { + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); } - return oi; + oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors); + + break; + default: + // Hmm.. not good, + // the only type expected here is STRUCT, which maps to HCatRecord + // - anything else is an error. Return null as the inspector. + throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() + + "] was not of struct type - HCatRecord expected struct type, got [" + + typeInfo.getCategory().toString() + "]"); + } + cachedHCatRecordObjectInspectors.put(typeInfo, oi); } + return oi; + } - public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) { + public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) { - ObjectInspector oi = cachedObjectInspectors.get(typeInfo); - if (oi == null) { + ObjectInspector oi = cachedObjectInspectors.get(typeInfo); + if (oi == null) { - LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName()); - switch (typeInfo.getCategory()) { - case PRIMITIVE: - oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); - break; - case STRUCT: - StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List fieldNames = structTypeInfo.getAllStructFieldNames(); - List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - List fieldObjectInspectors = - new ArrayList(fieldTypeInfos.size()); - for (int i = 0; i < fieldTypeInfos.size(); i++) { - fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); - } - oi = ObjectInspectorFactory.getStandardStructObjectInspector( - fieldNames, fieldObjectInspectors - ); - break; - case LIST: - ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((ListTypeInfo) typeInfo).getListElementTypeInfo()); - oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector); - break; - case MAP: - ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); - ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((MapTypeInfo) typeInfo).getMapValueTypeInfo()); - oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector); - break; - default: - oi = null; - } - cachedObjectInspectors.put(typeInfo, oi); + LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName()); + switch (typeInfo.getCategory()) { + case PRIMITIVE: + oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); + break; + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + List fieldObjectInspectors = + new ArrayList(fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); } - return oi; + oi = ObjectInspectorFactory.getStandardStructObjectInspector( + fieldNames, fieldObjectInspectors + ); + break; + case LIST: + ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((ListTypeInfo) typeInfo).getListElementTypeInfo()); + oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector); + break; + case MAP: + ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); + ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((MapTypeInfo) typeInfo).getMapValueTypeInfo()); + oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector); + break; + default: + oi = null; + } + cachedObjectInspectors.put(typeInfo, oi); } + return oi; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordSerDe.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordSerDe.java index f86da7f..96d7281 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordSerDe.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordSerDe.java @@ -54,265 +54,265 @@ */ public class HCatRecordSerDe implements SerDe { - private static final Logger LOG = LoggerFactory.getLogger(HCatRecordSerDe.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatRecordSerDe.class); - public HCatRecordSerDe() throws SerDeException { - } + public HCatRecordSerDe() throws SerDeException { + } - private List columnNames; - private List columnTypes; - private StructTypeInfo rowTypeInfo; + private List columnNames; + private List columnTypes; + private StructTypeInfo rowTypeInfo; - private HCatRecordObjectInspector cachedObjectInspector; + private HCatRecordObjectInspector cachedObjectInspector; - @Override - public void initialize(Configuration conf, Properties tbl) - throws SerDeException { + @Override + public void initialize(Configuration conf, Properties tbl) + throws SerDeException { - LOG.debug("Initializing HCatRecordSerDe"); - LOG.debug("props to serde: {}", tbl.entrySet()); + LOG.debug("Initializing HCatRecordSerDe"); + LOG.debug("props to serde: {}", tbl.entrySet()); - // Get column names and types - String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); + // Get column names and types + String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); + String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - // all table column names - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } + // all table column names + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } - // all column types - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - } + // all column types + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } - LOG.debug("columns: {} {}", columnNameProperty, columnNames); - LOG.debug("types: {} {}", columnTypeProperty, columnTypes); - assert (columnNames.size() == columnTypes.size()); + LOG.debug("columns: {} {}", columnNameProperty, columnNames); + LOG.debug("types: {} {}", columnTypeProperty, columnTypes); + assert (columnNames.size() == columnTypes.size()); - rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - } + rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + } - public void initialize(HCatSchema hsch) throws SerDeException { + public void initialize(HCatSchema hsch) throws SerDeException { - LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}.", hsch); + LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}.", hsch); - rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString()); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString()); + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - } + } - /** - * The purpose of a deserialize method is to turn a data blob - * which is a writable representation of the data into an - * object that can then be parsed using the appropriate - * ObjectInspector. In this case, since HCatRecord is directly - * already the Writable object, there's no extra work to be done - * here. Most of the logic resides in the ObjectInspector to be - * able to return values from within the HCatRecord to hive when - * it wants it. - */ - @Override - public Object deserialize(Writable data) throws SerDeException { - if (!(data instanceof HCatRecord)) { - throw new SerDeException(getClass().getName() + ": expects HCatRecord!"); - } - - return (HCatRecord) data; + /** + * The purpose of a deserialize method is to turn a data blob + * which is a writable representation of the data into an + * object that can then be parsed using the appropriate + * ObjectInspector. In this case, since HCatRecord is directly + * already the Writable object, there's no extra work to be done + * here. Most of the logic resides in the ObjectInspector to be + * able to return values from within the HCatRecord to hive when + * it wants it. + */ + @Override + public Object deserialize(Writable data) throws SerDeException { + if (!(data instanceof HCatRecord)) { + throw new SerDeException(getClass().getName() + ": expects HCatRecord!"); } - /** - * The purpose of the serialize method is to turn an object-representation - * with a provided ObjectInspector into a Writable format, which - * the underlying layer can then use to write out. - * - * In this case, it means that Hive will call this method to convert - * an object with appropriate objectinspectors that it knows about, - * to write out a HCatRecord. - */ - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) - throws SerDeException { - if (objInspector.getCategory() != Category.STRUCT) { - throw new SerDeException(getClass().toString() - + " can only serialize struct types, but we got: " - + objInspector.getTypeName()); - } - return new DefaultHCatRecord((List) serializeStruct(obj, (StructObjectInspector) objInspector)); + return (HCatRecord) data; + } + + /** + * The purpose of the serialize method is to turn an object-representation + * with a provided ObjectInspector into a Writable format, which + * the underlying layer can then use to write out. + * + * In this case, it means that Hive will call this method to convert + * an object with appropriate objectinspectors that it knows about, + * to write out a HCatRecord. + */ + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + if (objInspector.getCategory() != Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only serialize struct types, but we got: " + + objInspector.getTypeName()); } + return new DefaultHCatRecord((List) serializeStruct(obj, (StructObjectInspector) objInspector)); + } - /** - * Return serialized HCatRecord from an underlying - * object-representation, and readable by an ObjectInspector - * @param obj : Underlying object-representation - * @param soi : StructObjectInspector - * @return HCatRecord - */ - private static List serializeStruct(Object obj, StructObjectInspector soi) - throws SerDeException { - - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(obj); - - if (list == null) { - return null; - } - - List l = new ArrayList(fields.size()); - - if (fields != null) { - for (int i = 0; i < fields.size(); i++) { - - // Get the field objectInspector and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = list.get(i); - Object res = serializeField(f, foi); - l.add(i, res); - } - } - return l; - } + /** + * Return serialized HCatRecord from an underlying + * object-representation, and readable by an ObjectInspector + * @param obj : Underlying object-representation + * @param soi : StructObjectInspector + * @return HCatRecord + */ + private static List serializeStruct(Object obj, StructObjectInspector soi) + throws SerDeException { - /** - * Return underlying Java Object from an object-representation - * that is readable by a provided ObjectInspector. - */ - public static Object serializeField(Object field, ObjectInspector fieldObjectInspector) - throws SerDeException { - - Object res; - if (fieldObjectInspector.getCategory() == Category.PRIMITIVE) { - res = serializePrimitiveField(field, fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.STRUCT) { - res = serializeStruct(field, (StructObjectInspector) fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.LIST) { - res = serializeList(field, (ListObjectInspector) fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.MAP) { - res = serializeMap(field, (MapObjectInspector) fieldObjectInspector); - } else { - throw new SerDeException(HCatRecordSerDe.class.toString() - + " does not know what to do with fields of unknown category: " - + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName()); - } - return res; - } + List fields = soi.getAllStructFieldRefs(); + List list = soi.getStructFieldsDataAsList(obj); - /** - * Helper method to return underlying Java Map from - * an object-representation that is readable by a provided - * MapObjectInspector - */ - private static Map serializeMap(Object f, MapObjectInspector moi) throws SerDeException { - ObjectInspector koi = moi.getMapKeyObjectInspector(); - ObjectInspector voi = moi.getMapValueObjectInspector(); - Map m = new TreeMap(); - - Map readMap = moi.getMap(f); - if (readMap == null) { - return null; - } else { - for (Map.Entry entry : readMap.entrySet()) { - m.put(serializeField(entry.getKey(), koi), serializeField(entry.getValue(), voi)); - } - } - return m; + if (list == null) { + return null; } - private static List serializeList(Object f, ListObjectInspector loi) throws SerDeException { - List l = loi.getList(f); - if (l == null) { - return null; - } - - ObjectInspector eloi = loi.getListElementObjectInspector(); - if (eloi.getCategory() == Category.PRIMITIVE) { - List list = new ArrayList(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(((PrimitiveObjectInspector) eloi).getPrimitiveJavaObject(l.get(i))); - } - return list; - } else if (eloi.getCategory() == Category.STRUCT) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi)); - } - return list; - } else if (eloi.getCategory() == Category.LIST) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeList(l.get(i), (ListObjectInspector) eloi)); - } - return list; - } else if (eloi.getCategory() == Category.MAP) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeMap(l.get(i), (MapObjectInspector) eloi)); - } - return list; - } else { - throw new SerDeException(HCatRecordSerDe.class.toString() - + " does not know what to do with fields of unknown category: " - + eloi.getCategory() + " , type: " + eloi.getTypeName()); - } - } + List l = new ArrayList(fields.size()); - private static Object serializePrimitiveField(Object field, - ObjectInspector fieldObjectInspector) { - - if (field != null && HCatContext.INSTANCE.getConf().isPresent()) { - Configuration conf = HCatContext.INSTANCE.getConf().get(); - - if (field instanceof Boolean && - conf.getBoolean( - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) { - return ((Boolean) field) ? 1 : 0; - } else if (field instanceof Short && - conf.getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { - return new Integer((Short) field); - } else if (field instanceof Byte && - conf.getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { - return new Integer((Byte) field); - } - } - - return ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field); - } + if (fields != null) { + for (int i = 0; i < fields.size(); i++) { - /** - * Return an object inspector that can read through the object - * that we return from deserialize(). To wit, that means we need - * to return an ObjectInspector that can read HCatRecord, given - * the type info for it during initialize(). This also means - * that this method cannot and should not be called before initialize() - */ - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return (ObjectInspector) cachedObjectInspector; + // Get the field objectInspector and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = list.get(i); + Object res = serializeField(f, foi); + l.add(i, res); + } + } + return l; + } + + /** + * Return underlying Java Object from an object-representation + * that is readable by a provided ObjectInspector. + */ + public static Object serializeField(Object field, ObjectInspector fieldObjectInspector) + throws SerDeException { + + Object res; + if (fieldObjectInspector.getCategory() == Category.PRIMITIVE) { + res = serializePrimitiveField(field, fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.STRUCT) { + res = serializeStruct(field, (StructObjectInspector) fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.LIST) { + res = serializeList(field, (ListObjectInspector) fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.MAP) { + res = serializeMap(field, (MapObjectInspector) fieldObjectInspector); + } else { + throw new SerDeException(HCatRecordSerDe.class.toString() + + " does not know what to do with fields of unknown category: " + + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName()); } + return res; + } + + /** + * Helper method to return underlying Java Map from + * an object-representation that is readable by a provided + * MapObjectInspector + */ + private static Map serializeMap(Object f, MapObjectInspector moi) throws SerDeException { + ObjectInspector koi = moi.getMapKeyObjectInspector(); + ObjectInspector voi = moi.getMapValueObjectInspector(); + Map m = new TreeMap(); + + Map readMap = moi.getMap(f); + if (readMap == null) { + return null; + } else { + for (Map.Entry entry : readMap.entrySet()) { + m.put(serializeField(entry.getKey(), koi), serializeField(entry.getValue(), voi)); + } + } + return m; + } - @Override - public Class getSerializedClass() { - return HCatRecord.class; + private static List serializeList(Object f, ListObjectInspector loi) throws SerDeException { + List l = loi.getList(f); + if (l == null) { + return null; } - @Override - public SerDeStats getSerDeStats() { - // no support for statistics yet - return null; + ObjectInspector eloi = loi.getListElementObjectInspector(); + if (eloi.getCategory() == Category.PRIMITIVE) { + List list = new ArrayList(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(((PrimitiveObjectInspector) eloi).getPrimitiveJavaObject(l.get(i))); + } + return list; + } else if (eloi.getCategory() == Category.STRUCT) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi)); + } + return list; + } else if (eloi.getCategory() == Category.LIST) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeList(l.get(i), (ListObjectInspector) eloi)); + } + return list; + } else if (eloi.getCategory() == Category.MAP) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeMap(l.get(i), (MapObjectInspector) eloi)); + } + return list; + } else { + throw new SerDeException(HCatRecordSerDe.class.toString() + + " does not know what to do with fields of unknown category: " + + eloi.getCategory() + " , type: " + eloi.getTypeName()); + } + } + + private static Object serializePrimitiveField(Object field, + ObjectInspector fieldObjectInspector) { + + if (field != null && HCatContext.INSTANCE.getConf().isPresent()) { + Configuration conf = HCatContext.INSTANCE.getConf().get(); + + if (field instanceof Boolean && + conf.getBoolean( + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) { + return ((Boolean) field) ? 1 : 0; + } else if (field instanceof Short && + conf.getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { + return new Integer((Short) field); + } else if (field instanceof Byte && + conf.getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { + return new Integer((Byte) field); + } } + return ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field); + } + + /** + * Return an object inspector that can read through the object + * that we return from deserialize(). To wit, that means we need + * to return an ObjectInspector that can read HCatRecord, given + * the type info for it during initialize(). This also means + * that this method cannot and should not be called before initialize() + */ + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return (ObjectInspector) cachedObjectInspector; + } + + @Override + public Class getSerializedClass() { + return HCatRecord.class; + } + + @Override + public SerDeStats getSerDeStats() { + // no support for statistics yet + return null; + } + } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordable.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordable.java index e9f0a25..02ccc0e 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordable.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordable.java @@ -28,30 +28,30 @@ */ public interface HCatRecordable extends Writable { - /** - * Gets the field at the specified index. - * @param fieldNum the field number - * @return the object at the specified index - */ - Object get(int fieldNum); - - /** - * Gets all the fields of the hcat record. - * @return the list of fields - */ - List getAll(); - - /** - * Sets the field at the specified index. - * @param fieldNum the field number - * @param value the value to set - */ - void set(int fieldNum, Object value); - - /** - * Gets the size of the hcat record. - * @return the size - */ - int size(); + /** + * Gets the field at the specified index. + * @param fieldNum the field number + * @return the object at the specified index + */ + Object get(int fieldNum); + + /** + * Gets all the fields of the hcat record. + * @return the list of fields + */ + List getAll(); + + /** + * Sets the field at the specified index. + * @param fieldNum the field number + * @param value the value to set + */ + void set(int fieldNum, Object value); + + /** + * Gets the size of the hcat record. + * @return the size + */ + int size(); } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/JsonSerDe.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/JsonSerDe.java index 66bde04..15f2595 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/JsonSerDe.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/JsonSerDe.java @@ -76,503 +76,503 @@ */ public class JsonSerDe implements SerDe { - private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class); - private List columnNames; - private List columnTypes; + private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class); + private List columnNames; + private List columnTypes; - private StructTypeInfo rowTypeInfo; - private HCatSchema schema; + private StructTypeInfo rowTypeInfo; + private HCatSchema schema; - private JsonFactory jsonFactory = null; + private JsonFactory jsonFactory = null; - private HCatRecordObjectInspector cachedObjectInspector; + private HCatRecordObjectInspector cachedObjectInspector; - @Override - public void initialize(Configuration conf, Properties tbl) - throws SerDeException { + @Override + public void initialize(Configuration conf, Properties tbl) + throws SerDeException { - LOG.debug("Initializing JsonSerDe"); - LOG.debug("props to serde: {}", tbl.entrySet()); + LOG.debug("Initializing JsonSerDe"); + LOG.debug("props to serde: {}", tbl.entrySet()); - // Get column names and types - String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); + // Get column names and types + String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); + String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - // all table column names - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } + // all table column names + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } - // all column types - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - } + // all column types + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } - LOG.debug("columns: {}, {}", columnNameProperty, columnNames); - LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes); + LOG.debug("columns: {}, {}", columnNameProperty, columnNames); + LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes); - assert (columnNames.size() == columnTypes.size()); + assert (columnNames.size() == columnTypes.size()); - rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - try { - schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema(); - LOG.debug("schema : {}", schema); - LOG.debug("fields : {}", schema.getFieldNames()); - } catch (HCatException e) { - throw new SerDeException(e); - } + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + try { + schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema(); + LOG.debug("schema : {}", schema); + LOG.debug("fields : {}", schema.getFieldNames()); + } catch (HCatException e) { + throw new SerDeException(e); + } - jsonFactory = new JsonFactory(); + jsonFactory = new JsonFactory(); + } + + /** + * Takes JSON string in Text form, and has to return an object representation above + * it that's readable by the corresponding object inspector. + * + * For this implementation, since we're using the jackson parser, we can construct + * our own object implementation, and we use HCatRecord for it + */ + @Override + public Object deserialize(Writable blob) throws SerDeException { + + Text t = (Text) blob; + JsonParser p; + List r = new ArrayList(Collections.nCopies(columnNames.size(), null)); + try { + p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes()))); + if (p.nextToken() != JsonToken.START_OBJECT) { + throw new IOException("Start token not found where expected"); + } + JsonToken token; + while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) { + // iterate through each token, and create appropriate object here. + populateRecord(r, token, p, schema); + } + } catch (JsonParseException e) { + LOG.warn("Error [{}] parsing json text [{}].", e, t); + LOG.debug(null, e); + throw new SerDeException(e); + } catch (IOException e) { + LOG.warn("Error [{}] parsing json text [{}].", e, t); + LOG.debug(null, e); + throw new SerDeException(e); } - /** - * Takes JSON string in Text form, and has to return an object representation above - * it that's readable by the corresponding object inspector. - * - * For this implementation, since we're using the jackson parser, we can construct - * our own object implementation, and we use HCatRecord for it - */ - @Override - public Object deserialize(Writable blob) throws SerDeException { - - Text t = (Text) blob; - JsonParser p; - List r = new ArrayList(Collections.nCopies(columnNames.size(), null)); - try { - p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes()))); - if (p.nextToken() != JsonToken.START_OBJECT) { - throw new IOException("Start token not found where expected"); - } - JsonToken token; - while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) { - // iterate through each token, and create appropriate object here. - populateRecord(r, token, p, schema); - } - } catch (JsonParseException e) { - LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); - throw new SerDeException(e); - } catch (IOException e) { - LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); - throw new SerDeException(e); - } + return new DefaultHCatRecord(r); + } - return new DefaultHCatRecord(r); + private void populateRecord(List r, JsonToken token, JsonParser p, HCatSchema s) throws IOException { + if (token != JsonToken.FIELD_NAME) { + throw new IOException("Field name expected"); } - - private void populateRecord(List r, JsonToken token, JsonParser p, HCatSchema s) throws IOException { - if (token != JsonToken.FIELD_NAME) { - throw new IOException("Field name expected"); - } - String fieldName = p.getText(); - int fpos; - try { - fpos = s.getPosition(fieldName); - } catch (NullPointerException npe) { - fpos = getPositionFromHiveInternalColumnName(fieldName); - LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName, s); - if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) { - LOG.error("Hive internal column name {} and position " - + "encoding {} for the column name are at odds", fieldName, fpos); - throw npe; - } - if (fpos == -1) { - return; // unknown field, we return. - } - } - HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos); - Object currField = extractCurrentField(p, null, hcatFieldSchema, false); - r.set(fpos, currField); + String fieldName = p.getText(); + int fpos; + try { + fpos = s.getPosition(fieldName); + } catch (NullPointerException npe) { + fpos = getPositionFromHiveInternalColumnName(fieldName); + LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName, s); + if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) { + LOG.error("Hive internal column name {} and position " + + "encoding {} for the column name are at odds", fieldName, fpos); + throw npe; + } + if (fpos == -1) { + return; // unknown field, we return. + } } + HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos); + Object currField = extractCurrentField(p, null, hcatFieldSchema, false); + r.set(fpos, currField); + } - public String getHiveInternalColumnName(int fpos) { - return HiveConf.getColumnInternalName(fpos); - } + public String getHiveInternalColumnName(int fpos) { + return HiveConf.getColumnInternalName(fpos); + } - public int getPositionFromHiveInternalColumnName(String internalName) { + public int getPositionFromHiveInternalColumnName(String internalName) { // return HiveConf.getPositionFromInternalName(fieldName); - // The above line should have been all the implementation that - // we need, but due to a bug in that impl which recognizes - // only single-digit columns, we need another impl here. - Pattern internalPattern = Pattern.compile("_col([0-9]+)"); - Matcher m = internalPattern.matcher(internalName); - if (!m.matches()) { - return -1; - } else { - return Integer.parseInt(m.group(1)); - } + // The above line should have been all the implementation that + // we need, but due to a bug in that impl which recognizes + // only single-digit columns, we need another impl here. + Pattern internalPattern = Pattern.compile("_col([0-9]+)"); + Matcher m = internalPattern.matcher(internalName); + if (!m.matches()) { + return -1; + } else { + return Integer.parseInt(m.group(1)); + } + } + + /** + * Utility method to extract current expected field from given JsonParser + * + * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types) + * It is possible that one of them can be null, and so, if so, the other is instantiated + * from the other + * + * isTokenCurrent is a boolean variable also passed in, which determines + * if the JsonParser is already at the token we expect to read next, or + * needs advancing to the next before we read. + */ + private Object extractCurrentField(JsonParser p, Type t, + HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException, + HCatException { + Object val = null; + JsonToken valueToken; + if (isTokenCurrent) { + valueToken = p.getCurrentToken(); + } else { + valueToken = p.nextToken(); } - /** - * Utility method to extract current expected field from given JsonParser - * - * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types) - * It is possible that one of them can be null, and so, if so, the other is instantiated - * from the other - * - * isTokenCurrent is a boolean variable also passed in, which determines - * if the JsonParser is already at the token we expect to read next, or - * needs advancing to the next before we read. - */ - private Object extractCurrentField(JsonParser p, Type t, - HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException, - HCatException { - Object val = null; - JsonToken valueToken; - if (isTokenCurrent) { - valueToken = p.getCurrentToken(); + if (hcatFieldSchema != null) { + t = hcatFieldSchema.getType(); + } + switch (t) { + case INT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue(); + break; + case TINYINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue(); + break; + case SMALLINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue(); + break; + case BIGINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue(); + break; + case BOOLEAN: + String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); + if (bval != null) { + val = Boolean.valueOf(bval); + } else { + val = null; + } + break; + case FLOAT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue(); + break; + case DOUBLE: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue(); + break; + case STRING: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); + break; + case BINARY: + throw new IOException("JsonSerDe does not support BINARY type"); + case ARRAY: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_ARRAY) { + throw new IOException("Start of Array expected"); + } + List arr = new ArrayList(); + while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) { + arr.add(extractCurrentField(p, null, hcatFieldSchema.getArrayElementSchema().get(0), true)); + } + val = arr; + break; + case MAP: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_OBJECT) { + throw new IOException("Start of Object expected"); + } + Map map = new LinkedHashMap(); + Type keyType = hcatFieldSchema.getMapKeyType(); + HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0); + while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { + Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), keyType); + Object v; + if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) { + v = extractCurrentField(p, null, valueSchema, false); } else { - valueToken = p.nextToken(); + v = extractCurrentField(p, null, valueSchema, true); } - if (hcatFieldSchema != null) { - t = hcatFieldSchema.getType(); - } - switch (t) { - case INT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue(); - break; - case TINYINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue(); - break; - case SMALLINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue(); - break; - case BIGINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue(); - break; - case BOOLEAN: - String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); - if (bval != null) { - val = Boolean.valueOf(bval); - } else { - val = null; - } - break; - case FLOAT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue(); - break; - case DOUBLE: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue(); - break; - case STRING: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); - break; - case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); - case ARRAY: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_ARRAY) { - throw new IOException("Start of Array expected"); - } - List arr = new ArrayList(); - while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) { - arr.add(extractCurrentField(p, null, hcatFieldSchema.getArrayElementSchema().get(0), true)); - } - val = arr; - break; - case MAP: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_OBJECT) { - throw new IOException("Start of Object expected"); - } - Map map = new LinkedHashMap(); - Type keyType = hcatFieldSchema.getMapKeyType(); - HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0); - while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { - Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), keyType); - Object v; - if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) { - v = extractCurrentField(p, null, valueSchema, false); - } else { - v = extractCurrentField(p, null, valueSchema, true); - } - - map.put(k, v); - } - val = map; - break; - case STRUCT: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_OBJECT) { - throw new IOException("Start of Object expected"); - } - HCatSchema subSchema = hcatFieldSchema.getStructSubSchema(); - int sz = subSchema.getFieldNames().size(); - - List struct = new ArrayList(Collections.nCopies(sz, null)); - while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { - populateRecord(struct, valueToken, p, subSchema); - } - val = struct; - break; - } - return val; + map.put(k, v); + } + val = map; + break; + case STRUCT: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_OBJECT) { + throw new IOException("Start of Object expected"); + } + HCatSchema subSchema = hcatFieldSchema.getStructSubSchema(); + int sz = subSchema.getFieldNames().size(); + + List struct = new ArrayList(Collections.nCopies(sz, null)); + while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { + populateRecord(struct, valueToken, p, subSchema); + } + val = struct; + break; } - - private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException { - switch (t) { - case INT: - return Integer.valueOf(s); - case TINYINT: - return Byte.valueOf(s); - case SMALLINT: - return Short.valueOf(s); - case BIGINT: - return Long.valueOf(s); - case BOOLEAN: - return (s.equalsIgnoreCase("true")); - case FLOAT: - return Float.valueOf(s); - case DOUBLE: - return Double.valueOf(s); - case STRING: - return s; - case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); - } - throw new IOException("Could not convert from string to map type " + t); + return val; + } + + private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException { + switch (t) { + case INT: + return Integer.valueOf(s); + case TINYINT: + return Byte.valueOf(s); + case SMALLINT: + return Short.valueOf(s); + case BIGINT: + return Long.valueOf(s); + case BOOLEAN: + return (s.equalsIgnoreCase("true")); + case FLOAT: + return Float.valueOf(s); + case DOUBLE: + return Double.valueOf(s); + case STRING: + return s; + case BINARY: + throw new IOException("JsonSerDe does not support BINARY type"); } - - /** - * Given an object and object inspector pair, traverse the object - * and generate a Text representation of the object. - */ - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) - throws SerDeException { - StringBuilder sb = new StringBuilder(); - try { - - StructObjectInspector soi = (StructObjectInspector) objInspector; - List structFields = soi.getAllStructFieldRefs(); - assert (columnNames.size() == structFields.size()); - if (obj == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - for (int i = 0; i < structFields.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - sb.append(SerDeUtils.QUOTE); - sb.append(columnNames.get(i)); - sb.append(SerDeUtils.QUOTE); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)), - structFields.get(i).getFieldObjectInspector()); - } - sb.append(SerDeUtils.RBRACE); - } - - } catch (IOException e) { - LOG.warn("Error generating json text from object.", e); - throw new SerDeException(e); + throw new IOException("Could not convert from string to map type " + t); + } + + /** + * Given an object and object inspector pair, traverse the object + * and generate a Text representation of the object. + */ + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + StringBuilder sb = new StringBuilder(); + try { + + StructObjectInspector soi = (StructObjectInspector) objInspector; + List structFields = soi.getAllStructFieldRefs(); + assert (columnNames.size() == structFields.size()); + if (obj == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + for (int i = 0; i < structFields.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + sb.append(SerDeUtils.QUOTE); + sb.append(columnNames.get(i)); + sb.append(SerDeUtils.QUOTE); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)), + structFields.get(i).getFieldObjectInspector()); } - return new Text(sb.toString()); - } + sb.append(SerDeUtils.RBRACE); + } - // TODO : code section copied over from SerDeUtils because of non-standard json production there - // should use quotes for all field names. We should fix this there, and then remove this copy. - // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES - // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure - // when attempting to use that feature, so having to change the production itself. - // Also, throws IOException when Binary is detected. - private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException { - - switch (oi.getCategory()) { - case PRIMITIVE: { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - if (o == null) { - sb.append("null"); - } else { - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: { - boolean b = ((BooleanObjectInspector) poi).get(o); - sb.append(b ? "true" : "false"); - break; - } - case BYTE: { - sb.append(((ByteObjectInspector) poi).get(o)); - break; - } - case SHORT: { - sb.append(((ShortObjectInspector) poi).get(o)); - break; - } - case INT: { - sb.append(((IntObjectInspector) poi).get(o)); - break; - } - case LONG: { - sb.append(((LongObjectInspector) poi).get(o)); - break; - } - case FLOAT: { - sb.append(((FloatObjectInspector) poi).get(o)); - break; - } - case DOUBLE: { - sb.append(((DoubleObjectInspector) poi).get(o)); - break; - } - case STRING: { - sb.append('"'); - sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi) - .getPrimitiveJavaObject(o))); - sb.append('"'); - break; - } - case TIMESTAMP: { - sb.append('"'); - sb.append(((TimestampObjectInspector) poi) - .getPrimitiveWritableObject(o)); - sb.append('"'); - break; - } - case BINARY: { - throw new IOException("JsonSerDe does not support BINARY type"); - } - default: - throw new RuntimeException("Unknown primitive type: " - + poi.getPrimitiveCategory()); - } - } - break; + } catch (IOException e) { + LOG.warn("Error generating json text from object.", e); + throw new SerDeException(e); + } + return new Text(sb.toString()); + } + + // TODO : code section copied over from SerDeUtils because of non-standard json production there + // should use quotes for all field names. We should fix this there, and then remove this copy. + // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES + // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure + // when attempting to use that feature, so having to change the production itself. + // Also, throws IOException when Binary is detected. + private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException { + + switch (oi.getCategory()) { + case PRIMITIVE: { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + if (o == null) { + sb.append("null"); + } else { + switch (poi.getPrimitiveCategory()) { + case BOOLEAN: { + boolean b = ((BooleanObjectInspector) poi).get(o); + sb.append(b ? "true" : "false"); + break; + } + case BYTE: { + sb.append(((ByteObjectInspector) poi).get(o)); + break; + } + case SHORT: { + sb.append(((ShortObjectInspector) poi).get(o)); + break; + } + case INT: { + sb.append(((IntObjectInspector) poi).get(o)); + break; + } + case LONG: { + sb.append(((LongObjectInspector) poi).get(o)); + break; + } + case FLOAT: { + sb.append(((FloatObjectInspector) poi).get(o)); + break; } - case LIST: { - ListObjectInspector loi = (ListObjectInspector) oi; - ObjectInspector listElementObjectInspector = loi - .getListElementObjectInspector(); - List olist = loi.getList(o); - if (olist == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACKET); - for (int i = 0; i < olist.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - buildJSONString(sb, olist.get(i), listElementObjectInspector); - } - sb.append(SerDeUtils.RBRACKET); - } - break; + case DOUBLE: { + sb.append(((DoubleObjectInspector) poi).get(o)); + break; } - case MAP: { - MapObjectInspector moi = (MapObjectInspector) oi; - ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); - ObjectInspector mapValueObjectInspector = moi - .getMapValueObjectInspector(); - Map omap = moi.getMap(o); - if (omap == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - boolean first = true; - for (Object entry : omap.entrySet()) { - if (first) { - first = false; - } else { - sb.append(SerDeUtils.COMMA); - } - Map.Entry e = (Map.Entry) entry; - StringBuilder keyBuilder = new StringBuilder(); - buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector); - String keyString = keyBuilder.toString().trim(); - boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE); - if (doQuoting) { - sb.append(SerDeUtils.QUOTE); - } - sb.append(keyString); - if (doQuoting) { - sb.append(SerDeUtils.QUOTE); - } - sb.append(SerDeUtils.COLON); - buildJSONString(sb, e.getValue(), mapValueObjectInspector); - } - sb.append(SerDeUtils.RBRACE); - } - break; + case STRING: { + sb.append('"'); + sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi) + .getPrimitiveJavaObject(o))); + sb.append('"'); + break; } - case STRUCT: { - StructObjectInspector soi = (StructObjectInspector) oi; - List structFields = soi.getAllStructFieldRefs(); - if (o == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - for (int i = 0; i < structFields.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - sb.append(SerDeUtils.QUOTE); - sb.append(structFields.get(i).getFieldName()); - sb.append(SerDeUtils.QUOTE); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), - structFields.get(i).getFieldObjectInspector()); - } - sb.append(SerDeUtils.RBRACE); - } - break; + case TIMESTAMP: { + sb.append('"'); + sb.append(((TimestampObjectInspector) poi) + .getPrimitiveWritableObject(o)); + sb.append('"'); + break; } - case UNION: { - UnionObjectInspector uoi = (UnionObjectInspector) oi; - if (o == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - sb.append(uoi.getTag(o)); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, uoi.getField(o), - uoi.getObjectInspectors().get(uoi.getTag(o))); - sb.append(SerDeUtils.RBRACE); - } - break; + case BINARY: { + throw new IOException("JsonSerDe does not support BINARY type"); } default: - throw new RuntimeException("Unknown type in ObjectInspector!"); + throw new RuntimeException("Unknown primitive type: " + + poi.getPrimitiveCategory()); } + } + break; } - - - /** - * Returns an object inspector for the specified schema that - * is capable of reading in the object representation of the JSON string - */ - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return cachedObjectInspector; + case LIST: { + ListObjectInspector loi = (ListObjectInspector) oi; + ObjectInspector listElementObjectInspector = loi + .getListElementObjectInspector(); + List olist = loi.getList(o); + if (olist == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACKET); + for (int i = 0; i < olist.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + buildJSONString(sb, olist.get(i), listElementObjectInspector); + } + sb.append(SerDeUtils.RBRACKET); + } + break; } - - @Override - public Class getSerializedClass() { - return Text.class; + case MAP: { + MapObjectInspector moi = (MapObjectInspector) oi; + ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); + ObjectInspector mapValueObjectInspector = moi + .getMapValueObjectInspector(); + Map omap = moi.getMap(o); + if (omap == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + boolean first = true; + for (Object entry : omap.entrySet()) { + if (first) { + first = false; + } else { + sb.append(SerDeUtils.COMMA); + } + Map.Entry e = (Map.Entry) entry; + StringBuilder keyBuilder = new StringBuilder(); + buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector); + String keyString = keyBuilder.toString().trim(); + boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE); + if (doQuoting) { + sb.append(SerDeUtils.QUOTE); + } + sb.append(keyString); + if (doQuoting) { + sb.append(SerDeUtils.QUOTE); + } + sb.append(SerDeUtils.COLON); + buildJSONString(sb, e.getValue(), mapValueObjectInspector); + } + sb.append(SerDeUtils.RBRACE); + } + break; } - - @Override - public SerDeStats getSerDeStats() { - // no support for statistics yet - return null; + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector) oi; + List structFields = soi.getAllStructFieldRefs(); + if (o == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + for (int i = 0; i < structFields.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + sb.append(SerDeUtils.QUOTE); + sb.append(structFields.get(i).getFieldName()); + sb.append(SerDeUtils.QUOTE); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), + structFields.get(i).getFieldObjectInspector()); + } + sb.append(SerDeUtils.RBRACE); + } + break; + } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + if (o == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + sb.append(uoi.getTag(o)); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, uoi.getField(o), + uoi.getObjectInspectors().get(uoi.getTag(o))); + sb.append(SerDeUtils.RBRACE); + } + break; + } + default: + throw new RuntimeException("Unknown type in ObjectInspector!"); } + } + + + /** + * Returns an object inspector for the specified schema that + * is capable of reading in the object representation of the JSON string + */ + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return cachedObjectInspector; + } + + @Override + public Class getSerializedClass() { + return Text.class; + } + + @Override + public SerDeStats getSerDeStats() { + // no support for statistics yet + return null; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/LazyHCatRecord.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/LazyHCatRecord.java index 421c149..f1eb349 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/LazyHCatRecord.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/LazyHCatRecord.java @@ -42,106 +42,106 @@ */ public class LazyHCatRecord extends HCatRecord { - public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName()); - - private Object wrappedObject; - private StructObjectInspector soi; - - @Override - public Object get(int fieldNum) { - try { - StructField fref = soi.getAllStructFieldRefs().get(fieldNum); - return HCatRecordSerDe.serializeField( - soi.getStructFieldData(wrappedObject, fref), - fref.getFieldObjectInspector()); - } catch (SerDeException e) { - throw new IllegalStateException("SerDe Exception deserializing",e); - } + public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName()); + + private Object wrappedObject; + private StructObjectInspector soi; + + @Override + public Object get(int fieldNum) { + try { + StructField fref = soi.getAllStructFieldRefs().get(fieldNum); + return HCatRecordSerDe.serializeField( + soi.getStructFieldData(wrappedObject, fref), + fref.getFieldObjectInspector()); + } catch (SerDeException e) { + throw new IllegalStateException("SerDe Exception deserializing",e); } + } - @Override - public List getAll() { - List r = new ArrayList(this.size()); - for (int i = 0; i < this.size(); i++){ - r.add(i, get(i)); - } - return r; + @Override + public List getAll() { + List r = new ArrayList(this.size()); + for (int i = 0; i < this.size(); i++){ + r.add(i, get(i)); } - - @Override - public void set(int fieldNum, Object value) { - throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); - } - - @Override - public int size() { - return soi.getAllStructFieldRefs().size(); - } - - @Override - public void readFields(DataInput in) throws IOException { - throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" - + " an object/object inspector as a HCatRecord " - + "- it does not need to be read from DataInput."); + return r; + } + + @Override + public void set(int fieldNum, Object value) { + throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); + } + + @Override + public int size() { + return soi.getAllStructFieldRefs().size(); + } + + @Override + public void readFields(DataInput in) throws IOException { + throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" + + " an object/object inspector as a HCatRecord " + + "- it does not need to be read from DataInput."); + } + + @Override + public void write(DataOutput out) throws IOException { + throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" + + " an object/object inspector as a HCatRecord " + + "- it does not need to be written to a DataOutput."); + } + + @Override + public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { + int idx = recordSchema.getPosition(fieldName); + return get(idx); + } + + @Override + public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { + throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); + } + + @Override + public void remove(int idx) throws HCatException { + throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord"); + } + + @Override + public void copy(HCatRecord r) throws HCatException { + throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord"); + } + + public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception { + if (oi.getCategory() != Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only make a lazy hcat record from " + + "objects of struct types, but we got: " + oi.getTypeName()); } - @Override - public void write(DataOutput out) throws IOException { - throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" - + " an object/object inspector as a HCatRecord " - + "- it does not need to be written to a DataOutput."); - } - - @Override - public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { - int idx = recordSchema.getPosition(fieldName); - return get(idx); - } - - @Override - public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { - throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); - } - - @Override - public void remove(int idx) throws HCatException { - throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord"); - } - - @Override - public void copy(HCatRecord r) throws HCatException { - throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord"); - } - - public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception { - if (oi.getCategory() != Category.STRUCT) { - throw new SerDeException(getClass().toString() + - " can only make a lazy hcat record from " + - "objects of struct types, but we got: " + oi.getTypeName()); - } - - this.soi = (StructObjectInspector)oi; - this.wrappedObject = wrappedObject; - } - - @Override - public String toString(){ - StringBuilder sb = new StringBuilder(); - for(int i = 0; i< size() ; i++) { - sb.append(get(i)+"\t"); - } - return sb.toString(); - } + this.soi = (StructObjectInspector)oi; + this.wrappedObject = wrappedObject; + } - /** - * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required - * before you can write out a record via write. - * @return an HCatRecord that can be serialized - * @throws HCatException - */ - public HCatRecord getWritable() throws HCatException { - DefaultHCatRecord d = new DefaultHCatRecord(); - d.copy(this); - return d; + @Override + public String toString(){ + StringBuilder sb = new StringBuilder(); + for(int i = 0; i< size() ; i++) { + sb.append(get(i)+"\t"); } + return sb.toString(); + } + + /** + * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required + * before you can write out a record via write. + * @return an HCatRecord that can be serialized + * @throws HCatException + */ + public HCatRecord getWritable() throws HCatException { + DefaultHCatRecord d = new DefaultHCatRecord(); + d.copy(this); + return d; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/Pair.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/Pair.java index ba553c3..1171663 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/Pair.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/Pair.java @@ -26,65 +26,65 @@ */ public class Pair implements Serializable { - private static final long serialVersionUID = 1L; - public T first; - public U second; + private static final long serialVersionUID = 1L; + public T first; + public U second; - /** - * @param f First element in pair. - * @param s Second element in pair. - */ - public Pair(T f, U s) { - first = f; - second = s; - } + /** + * @param f First element in pair. + * @param s Second element in pair. + */ + public Pair(T f, U s) { + first = f; + second = s; + } - /* (non-Javadoc) - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "[" + first.toString() + "," + second.toString() + "]"; - } + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "[" + first.toString() + "," + second.toString() + "]"; + } - @Override - public int hashCode() { - return (((this.first == null ? 1 : this.first.hashCode()) * 17) - + (this.second == null ? 1 : this.second.hashCode()) * 19); - } + @Override + public int hashCode() { + return (((this.first == null ? 1 : this.first.hashCode()) * 17) + + (this.second == null ? 1 : this.second.hashCode()) * 19); + } - @Override - public boolean equals(Object other) { - if (other == null) { - return false; - } + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } - if (!(other instanceof Pair)) { - return false; - } + if (!(other instanceof Pair)) { + return false; + } - Pair otherPair = (Pair) other; + Pair otherPair = (Pair) other; - if (this.first == null) { - if (otherPair.first != null) { - return false; - } else { - return true; - } - } + if (this.first == null) { + if (otherPair.first != null) { + return false; + } else { + return true; + } + } - if (this.second == null) { - if (otherPair.second != null) { - return false; - } else { - return true; - } - } + if (this.second == null) { + if (otherPair.second != null) { + return false; + } else { + return true; + } + } - if (this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) { - return true; - } else { - return false; - } + if (this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) { + return true; + } else { + return false; } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/ReaderWriter.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/ReaderWriter.java index 5aba66f..ea0b5b8 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/ReaderWriter.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/ReaderWriter.java @@ -38,158 +38,158 @@ */ public abstract class ReaderWriter { - private static final String UTF8 = "UTF-8"; - - public static Object readDatum(DataInput in) throws IOException { - - byte type = in.readByte(); - switch (type) { - - case DataType.STRING: - byte[] buffer = new byte[in.readInt()]; - in.readFully(buffer); - return new String(buffer, UTF8); - - case DataType.INTEGER: - VIntWritable vint = new VIntWritable(); - vint.readFields(in); - return vint.get(); - - case DataType.LONG: - VLongWritable vlong = new VLongWritable(); - vlong.readFields(in); - return vlong.get(); - - case DataType.FLOAT: - return in.readFloat(); - - case DataType.DOUBLE: - return in.readDouble(); - - case DataType.BOOLEAN: - return in.readBoolean(); - - case DataType.BYTE: - return in.readByte(); - - case DataType.SHORT: - return in.readShort(); - - case DataType.NULL: - return null; - - case DataType.BINARY: - int len = in.readInt(); - byte[] ba = new byte[len]; - in.readFully(ba); - return ba; - - case DataType.MAP: - int size = in.readInt(); - Map m = new HashMap(size); - for (int i = 0; i < size; i++) { - m.put(readDatum(in), readDatum(in)); - } - return m; - - case DataType.LIST: - int sz = in.readInt(); - List list = new ArrayList(sz); - for (int i = 0; i < sz; i++) { - list.add(readDatum(in)); - } - return list; - - default: - throw new IOException("Unexpected data type " + type + - " found in stream."); - } + private static final String UTF8 = "UTF-8"; + + public static Object readDatum(DataInput in) throws IOException { + + byte type = in.readByte(); + switch (type) { + + case DataType.STRING: + byte[] buffer = new byte[in.readInt()]; + in.readFully(buffer); + return new String(buffer, UTF8); + + case DataType.INTEGER: + VIntWritable vint = new VIntWritable(); + vint.readFields(in); + return vint.get(); + + case DataType.LONG: + VLongWritable vlong = new VLongWritable(); + vlong.readFields(in); + return vlong.get(); + + case DataType.FLOAT: + return in.readFloat(); + + case DataType.DOUBLE: + return in.readDouble(); + + case DataType.BOOLEAN: + return in.readBoolean(); + + case DataType.BYTE: + return in.readByte(); + + case DataType.SHORT: + return in.readShort(); + + case DataType.NULL: + return null; + + case DataType.BINARY: + int len = in.readInt(); + byte[] ba = new byte[len]; + in.readFully(ba); + return ba; + + case DataType.MAP: + int size = in.readInt(); + Map m = new HashMap(size); + for (int i = 0; i < size; i++) { + m.put(readDatum(in), readDatum(in)); + } + return m; + + case DataType.LIST: + int sz = in.readInt(); + List list = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + list.add(readDatum(in)); + } + return list; + + default: + throw new IOException("Unexpected data type " + type + + " found in stream."); } - - public static void writeDatum(DataOutput out, Object val) throws IOException { - // write the data type - byte type = DataType.findType(val); - switch (type) { - case DataType.LIST: - out.writeByte(DataType.LIST); - List list = (List) val; - int sz = list.size(); - out.writeInt(sz); - for (int i = 0; i < sz; i++) { - writeDatum(out, list.get(i)); - } - return; - - case DataType.MAP: - out.writeByte(DataType.MAP); - Map m = (Map) val; - out.writeInt(m.size()); - Iterator i = - m.entrySet().iterator(); - while (i.hasNext()) { - Entry entry = (Entry) i.next(); - writeDatum(out, entry.getKey()); - writeDatum(out, entry.getValue()); - } - return; - - case DataType.INTEGER: - out.writeByte(DataType.INTEGER); - new VIntWritable((Integer) val).write(out); - return; - - case DataType.LONG: - out.writeByte(DataType.LONG); - new VLongWritable((Long) val).write(out); - return; - - case DataType.FLOAT: - out.writeByte(DataType.FLOAT); - out.writeFloat((Float) val); - return; - - case DataType.DOUBLE: - out.writeByte(DataType.DOUBLE); - out.writeDouble((Double) val); - return; - - case DataType.BOOLEAN: - out.writeByte(DataType.BOOLEAN); - out.writeBoolean((Boolean) val); - return; - - case DataType.BYTE: - out.writeByte(DataType.BYTE); - out.writeByte((Byte) val); - return; - - case DataType.SHORT: - out.writeByte(DataType.SHORT); - out.writeShort((Short) val); - return; - - case DataType.STRING: - String s = (String) val; - byte[] utfBytes = s.getBytes(ReaderWriter.UTF8); - out.writeByte(DataType.STRING); - out.writeInt(utfBytes.length); - out.write(utfBytes); - return; - - case DataType.BINARY: - byte[] ba = (byte[]) val; - out.writeByte(DataType.BINARY); - out.writeInt(ba.length); - out.write(ba); - return; - - case DataType.NULL: - out.writeByte(DataType.NULL); - return; - - default: - throw new IOException("Unexpected data type " + type + - " found in stream."); - } + } + + public static void writeDatum(DataOutput out, Object val) throws IOException { + // write the data type + byte type = DataType.findType(val); + switch (type) { + case DataType.LIST: + out.writeByte(DataType.LIST); + List list = (List) val; + int sz = list.size(); + out.writeInt(sz); + for (int i = 0; i < sz; i++) { + writeDatum(out, list.get(i)); + } + return; + + case DataType.MAP: + out.writeByte(DataType.MAP); + Map m = (Map) val; + out.writeInt(m.size()); + Iterator i = + m.entrySet().iterator(); + while (i.hasNext()) { + Entry entry = (Entry) i.next(); + writeDatum(out, entry.getKey()); + writeDatum(out, entry.getValue()); + } + return; + + case DataType.INTEGER: + out.writeByte(DataType.INTEGER); + new VIntWritable((Integer) val).write(out); + return; + + case DataType.LONG: + out.writeByte(DataType.LONG); + new VLongWritable((Long) val).write(out); + return; + + case DataType.FLOAT: + out.writeByte(DataType.FLOAT); + out.writeFloat((Float) val); + return; + + case DataType.DOUBLE: + out.writeByte(DataType.DOUBLE); + out.writeDouble((Double) val); + return; + + case DataType.BOOLEAN: + out.writeByte(DataType.BOOLEAN); + out.writeBoolean((Boolean) val); + return; + + case DataType.BYTE: + out.writeByte(DataType.BYTE); + out.writeByte((Byte) val); + return; + + case DataType.SHORT: + out.writeByte(DataType.SHORT); + out.writeShort((Short) val); + return; + + case DataType.STRING: + String s = (String) val; + byte[] utfBytes = s.getBytes(ReaderWriter.UTF8); + out.writeByte(DataType.STRING); + out.writeInt(utfBytes.length); + out.write(utfBytes); + return; + + case DataType.BINARY: + byte[] ba = (byte[]) val; + out.writeByte(DataType.BINARY); + out.writeInt(ba.length); + out.write(ba); + return; + + case DataType.NULL: + out.writeByte(DataType.NULL); + return; + + default: + throw new IOException("Unexpected data type " + type + + " found in stream."); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatFieldSchema.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatFieldSchema.java index 94f1317..700689a 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatFieldSchema.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatFieldSchema.java @@ -28,258 +28,258 @@ */ public class HCatFieldSchema implements Serializable { - public enum Type { - INT, - TINYINT, - SMALLINT, - BIGINT, - BOOLEAN, - FLOAT, - DOUBLE, - STRING, - ARRAY, - MAP, - STRUCT, - BINARY, + public enum Type { + INT, + TINYINT, + SMALLINT, + BIGINT, + BOOLEAN, + FLOAT, + DOUBLE, + STRING, + ARRAY, + MAP, + STRUCT, + BINARY, + } + + public enum Category { + PRIMITIVE, + ARRAY, + MAP, + STRUCT; + + public static Category fromType(Type type) { + if (Type.ARRAY == type) { + return ARRAY; + } else if (Type.STRUCT == type) { + return STRUCT; + } else if (Type.MAP == type) { + return MAP; + } else { + return PRIMITIVE; + } } - - public enum Category { - PRIMITIVE, - ARRAY, - MAP, - STRUCT; - - public static Category fromType(Type type) { - if (Type.ARRAY == type) { - return ARRAY; - } else if (Type.STRUCT == type) { - return STRUCT; - } else if (Type.MAP == type) { - return MAP; - } else { - return PRIMITIVE; - } - } - } - - ; - - public boolean isComplex() { - return (category == Category.PRIMITIVE) ? false : true; - } - - /** - * - */ - private static final long serialVersionUID = 1L; - - String fieldName = null; - String comment = null; - Type type = null; - Category category = null; - - // Populated if column is struct, array or map types. - // If struct type, contains schema of the struct. - // If array type, contains schema of one of the elements. - // If map type, contains schema of the value element. - HCatSchema subSchema = null; - - // populated if column is Map type - Type mapKeyType = null; - - private String typeString = null; - - @SuppressWarnings("unused") - private HCatFieldSchema() { - // preventing empty ctor from being callable - } - - /** - * Returns type of the field - * @return type of the field - */ - public Type getType() { - return type; - } - - /** - * Returns category of the field - * @return category of the field - */ - public Category getCategory() { - return category; - } - - /** - * Returns name of the field - * @return name of the field - */ - public String getName() { - return fieldName; - } - - public String getComment() { - return comment; - } - - /** - * Constructor constructing a primitive datatype HCatFieldSchema - * @param fieldName Name of the primitive field - * @param type Type of the primitive field - * @throws HCatException if call made on non-primitive types - */ - public HCatFieldSchema(String fieldName, Type type, String comment) throws HCatException { - assertTypeInCategory(type, Category.PRIMITIVE, fieldName); - this.fieldName = fieldName; - this.type = type; - this.category = Category.PRIMITIVE; - this.comment = comment; - } - - /** - * Constructor for constructing a ARRAY type or STRUCT type HCatFieldSchema, passing type and subschema - * @param fieldName Name of the array or struct field - * @param type Type of the field - either Type.ARRAY or Type.STRUCT - * @param subSchema - subschema of the struct, or element schema of the elements in the array - * @throws HCatException if call made on Primitive or Map types - */ - public HCatFieldSchema(String fieldName, Type type, HCatSchema subSchema, String comment) throws HCatException { - assertTypeNotInCategory(type, Category.PRIMITIVE); - assertTypeNotInCategory(type, Category.MAP); - this.fieldName = fieldName; - this.type = type; - this.category = Category.fromType(type); - this.subSchema = subSchema; - if (type == Type.ARRAY) { - this.subSchema.get(0).setName(null); - } - this.comment = comment; - } - - private void setName(String name) { - this.fieldName = name; + } + + ; + + public boolean isComplex() { + return (category == Category.PRIMITIVE) ? false : true; + } + + /** + * + */ + private static final long serialVersionUID = 1L; + + String fieldName = null; + String comment = null; + Type type = null; + Category category = null; + + // Populated if column is struct, array or map types. + // If struct type, contains schema of the struct. + // If array type, contains schema of one of the elements. + // If map type, contains schema of the value element. + HCatSchema subSchema = null; + + // populated if column is Map type + Type mapKeyType = null; + + private String typeString = null; + + @SuppressWarnings("unused") + private HCatFieldSchema() { + // preventing empty ctor from being callable + } + + /** + * Returns type of the field + * @return type of the field + */ + public Type getType() { + return type; + } + + /** + * Returns category of the field + * @return category of the field + */ + public Category getCategory() { + return category; + } + + /** + * Returns name of the field + * @return name of the field + */ + public String getName() { + return fieldName; + } + + public String getComment() { + return comment; + } + + /** + * Constructor constructing a primitive datatype HCatFieldSchema + * @param fieldName Name of the primitive field + * @param type Type of the primitive field + * @throws HCatException if call made on non-primitive types + */ + public HCatFieldSchema(String fieldName, Type type, String comment) throws HCatException { + assertTypeInCategory(type, Category.PRIMITIVE, fieldName); + this.fieldName = fieldName; + this.type = type; + this.category = Category.PRIMITIVE; + this.comment = comment; + } + + /** + * Constructor for constructing a ARRAY type or STRUCT type HCatFieldSchema, passing type and subschema + * @param fieldName Name of the array or struct field + * @param type Type of the field - either Type.ARRAY or Type.STRUCT + * @param subSchema - subschema of the struct, or element schema of the elements in the array + * @throws HCatException if call made on Primitive or Map types + */ + public HCatFieldSchema(String fieldName, Type type, HCatSchema subSchema, String comment) throws HCatException { + assertTypeNotInCategory(type, Category.PRIMITIVE); + assertTypeNotInCategory(type, Category.MAP); + this.fieldName = fieldName; + this.type = type; + this.category = Category.fromType(type); + this.subSchema = subSchema; + if (type == Type.ARRAY) { + this.subSchema.get(0).setName(null); } - - /** - * Constructor for constructing a MAP type HCatFieldSchema, passing type of key and value - * @param fieldName Name of the array or struct field - * @param type Type of the field - must be Type.MAP - * @param mapKeyType - key type of the Map - * @param mapValueSchema - subschema of the value of the Map - * @throws HCatException if call made on non-Map types - */ - public HCatFieldSchema(String fieldName, Type type, Type mapKeyType, HCatSchema mapValueSchema, String comment) throws HCatException { - assertTypeInCategory(type, Category.MAP, fieldName); - assertTypeInCategory(mapKeyType, Category.PRIMITIVE, fieldName); - this.fieldName = fieldName; - this.type = Type.MAP; - this.category = Category.MAP; - this.mapKeyType = mapKeyType; - this.subSchema = mapValueSchema; - this.subSchema.get(0).setName(null); - this.comment = comment; + this.comment = comment; + } + + private void setName(String name) { + this.fieldName = name; + } + + /** + * Constructor for constructing a MAP type HCatFieldSchema, passing type of key and value + * @param fieldName Name of the array or struct field + * @param type Type of the field - must be Type.MAP + * @param mapKeyType - key type of the Map + * @param mapValueSchema - subschema of the value of the Map + * @throws HCatException if call made on non-Map types + */ + public HCatFieldSchema(String fieldName, Type type, Type mapKeyType, HCatSchema mapValueSchema, String comment) throws HCatException { + assertTypeInCategory(type, Category.MAP, fieldName); + assertTypeInCategory(mapKeyType, Category.PRIMITIVE, fieldName); + this.fieldName = fieldName; + this.type = Type.MAP; + this.category = Category.MAP; + this.mapKeyType = mapKeyType; + this.subSchema = mapValueSchema; + this.subSchema.get(0).setName(null); + this.comment = comment; + } + + public HCatSchema getStructSubSchema() throws HCatException { + assertTypeInCategory(this.type, Category.STRUCT, this.fieldName); + return subSchema; + } + + public HCatSchema getArrayElementSchema() throws HCatException { + assertTypeInCategory(this.type, Category.ARRAY, this.fieldName); + return subSchema; + } + + public Type getMapKeyType() throws HCatException { + assertTypeInCategory(this.type, Category.MAP, this.fieldName); + return mapKeyType; + } + + public HCatSchema getMapValueSchema() throws HCatException { + assertTypeInCategory(this.type, Category.MAP, this.fieldName); + return subSchema; + } + + private static void assertTypeInCategory(Type type, Category category, String fieldName) throws HCatException { + Category typeCategory = Category.fromType(type); + if (typeCategory != category) { + throw new HCatException("Type category mismatch. Expected " + category + " but type " + type + " in category " + typeCategory + " (field " + fieldName + ")"); } + } - public HCatSchema getStructSubSchema() throws HCatException { - assertTypeInCategory(this.type, Category.STRUCT, this.fieldName); - return subSchema; + private static void assertTypeNotInCategory(Type type, Category category) throws HCatException { + Category typeCategory = Category.fromType(type); + if (typeCategory == category) { + throw new HCatException("Type category mismatch. Expected type " + type + " not in category " + category + " but was so."); } - - public HCatSchema getArrayElementSchema() throws HCatException { - assertTypeInCategory(this.type, Category.ARRAY, this.fieldName); - return subSchema; + } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("fieldName", fieldName) + .append("comment", comment) + .append("type", getTypeString()) + .append("category", category) + .toString(); + } + + public String getTypeString() { + if (typeString != null) { + return typeString; } - public Type getMapKeyType() throws HCatException { - assertTypeInCategory(this.type, Category.MAP, this.fieldName); - return mapKeyType; + StringBuilder sb = new StringBuilder(); + if (Category.PRIMITIVE == category) { + sb.append(type); + } else if (Category.STRUCT == category) { + sb.append("struct<"); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); + } else if (Category.ARRAY == category) { + sb.append("array<"); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); + } else if (Category.MAP == category) { + sb.append("map<"); + sb.append(mapKeyType); + sb.append(","); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); } + return (typeString = sb.toString().toLowerCase()); + } - public HCatSchema getMapValueSchema() throws HCatException { - assertTypeInCategory(this.type, Category.MAP, this.fieldName); - return subSchema; + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; } - - private static void assertTypeInCategory(Type type, Category category, String fieldName) throws HCatException { - Category typeCategory = Category.fromType(type); - if (typeCategory != category) { - throw new HCatException("Type category mismatch. Expected " + category + " but type " + type + " in category " + typeCategory + " (field " + fieldName + ")"); - } + if (obj == null) { + return false; } - - private static void assertTypeNotInCategory(Type type, Category category) throws HCatException { - Category typeCategory = Category.fromType(type); - if (typeCategory == category) { - throw new HCatException("Type category mismatch. Expected type " + type + " not in category " + category + " but was so."); - } + if (!(obj instanceof HCatFieldSchema)) { + return false; } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("fieldName", fieldName) - .append("comment", comment) - .append("type", getTypeString()) - .append("category", category) - .toString(); + HCatFieldSchema other = (HCatFieldSchema) obj; + if (category != other.category) { + return false; } - - public String getTypeString() { - if (typeString != null) { - return typeString; - } - - StringBuilder sb = new StringBuilder(); - if (Category.PRIMITIVE == category) { - sb.append(type); - } else if (Category.STRUCT == category) { - sb.append("struct<"); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } else if (Category.ARRAY == category) { - sb.append("array<"); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } else if (Category.MAP == category) { - sb.append("map<"); - sb.append(mapKeyType); - sb.append(","); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } - return (typeString = sb.toString().toLowerCase()); + if (fieldName == null) { + if (other.fieldName != null) { + return false; + } + } else if (!fieldName.equals(other.fieldName)) { + return false; } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HCatFieldSchema)) { - return false; - } - HCatFieldSchema other = (HCatFieldSchema) obj; - if (category != other.category) { - return false; - } - if (fieldName == null) { - if (other.fieldName != null) { - return false; - } - } else if (!fieldName.equals(other.fieldName)) { - return false; - } - if (this.getTypeString() == null) { - if (other.getTypeString() != null) { - return false; - } - } else if (!this.getTypeString().equals(other.getTypeString())) { - return false; - } - return true; + if (this.getTypeString() == null) { + if (other.getTypeString() != null) { + return false; + } + } else if (!this.getTypeString().equals(other.getTypeString())) { + return false; } + return true; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchema.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchema.java index c053fff..bbb2619 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchema.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchema.java @@ -33,152 +33,152 @@ */ public class HCatSchema implements Serializable { - private static final long serialVersionUID = 1L; - - private final List fieldSchemas; - private final Map fieldPositionMap; - private final List fieldNames; - - /** - * - * @param fieldSchemas is now owned by HCatSchema. Any subsequent modifications - * on fieldSchemas won't get reflected in HCatSchema. Each fieldSchema's name - * in the list must be unique, otherwise throws IllegalArgumentException. - */ - public HCatSchema(final List fieldSchemas) { - this.fieldSchemas = new ArrayList(fieldSchemas); - int idx = 0; - fieldPositionMap = new HashMap(); - fieldNames = new ArrayList(); - for (HCatFieldSchema field : fieldSchemas) { - if (field == null) - throw new IllegalArgumentException("Field cannot be null"); - - String fieldName = field.getName(); - if (fieldPositionMap.containsKey(fieldName)) - throw new IllegalArgumentException("Field named " + fieldName + - " already exists"); - fieldPositionMap.put(fieldName, idx); - fieldNames.add(fieldName); - idx++; - } + private static final long serialVersionUID = 1L; + + private final List fieldSchemas; + private final Map fieldPositionMap; + private final List fieldNames; + + /** + * + * @param fieldSchemas is now owned by HCatSchema. Any subsequent modifications + * on fieldSchemas won't get reflected in HCatSchema. Each fieldSchema's name + * in the list must be unique, otherwise throws IllegalArgumentException. + */ + public HCatSchema(final List fieldSchemas) { + this.fieldSchemas = new ArrayList(fieldSchemas); + int idx = 0; + fieldPositionMap = new HashMap(); + fieldNames = new ArrayList(); + for (HCatFieldSchema field : fieldSchemas) { + if (field == null) + throw new IllegalArgumentException("Field cannot be null"); + + String fieldName = field.getName(); + if (fieldPositionMap.containsKey(fieldName)) + throw new IllegalArgumentException("Field named " + fieldName + + " already exists"); + fieldPositionMap.put(fieldName, idx); + fieldNames.add(fieldName); + idx++; } - - public void append(final HCatFieldSchema hfs) throws HCatException { - if (hfs == null) - throw new HCatException("Attempt to append null HCatFieldSchema in HCatSchema."); - - String fieldName = hfs.getName(); - if (fieldPositionMap.containsKey(fieldName)) - throw new HCatException("Attempt to append HCatFieldSchema with already " + - "existing name: " + fieldName + "."); - - this.fieldSchemas.add(hfs); - this.fieldNames.add(fieldName); - this.fieldPositionMap.put(fieldName, this.size() - 1); - } - - /** - * Users are not allowed to modify the list directly, since HCatSchema - * maintains internal state. Use append/remove to modify the schema. - */ - public List getFields() { - return Collections.unmodifiableList(this.fieldSchemas); - } - - /** - * @param fieldName - * @return the index of field named fieldName in Schema. If field is not - * present, returns null. - */ - public Integer getPosition(String fieldName) { - return fieldPositionMap.get(fieldName); - } - - public HCatFieldSchema get(String fieldName) throws HCatException { - return get(getPosition(fieldName)); + } + + public void append(final HCatFieldSchema hfs) throws HCatException { + if (hfs == null) + throw new HCatException("Attempt to append null HCatFieldSchema in HCatSchema."); + + String fieldName = hfs.getName(); + if (fieldPositionMap.containsKey(fieldName)) + throw new HCatException("Attempt to append HCatFieldSchema with already " + + "existing name: " + fieldName + "."); + + this.fieldSchemas.add(hfs); + this.fieldNames.add(fieldName); + this.fieldPositionMap.put(fieldName, this.size() - 1); + } + + /** + * Users are not allowed to modify the list directly, since HCatSchema + * maintains internal state. Use append/remove to modify the schema. + */ + public List getFields() { + return Collections.unmodifiableList(this.fieldSchemas); + } + + /** + * @param fieldName + * @return the index of field named fieldName in Schema. If field is not + * present, returns null. + */ + public Integer getPosition(String fieldName) { + return fieldPositionMap.get(fieldName); + } + + public HCatFieldSchema get(String fieldName) throws HCatException { + return get(getPosition(fieldName)); + } + + public List getFieldNames() { + return this.fieldNames; + } + + public HCatFieldSchema get(int position) { + return fieldSchemas.get(position); + } + + public int size() { + return fieldSchemas.size(); + } + + public void remove(final HCatFieldSchema hcatFieldSchema) throws HCatException { + + if (!fieldSchemas.contains(hcatFieldSchema)) { + throw new HCatException("Attempt to delete a non-existent column from HCat Schema: " + hcatFieldSchema); } - public List getFieldNames() { - return this.fieldNames; + fieldSchemas.remove(hcatFieldSchema); + fieldPositionMap.remove(hcatFieldSchema); + fieldNames.remove(hcatFieldSchema.getName()); + } + + @Override + public String toString() { + boolean first = true; + StringBuilder sb = new StringBuilder(); + for (HCatFieldSchema hfs : fieldSchemas) { + if (!first) { + sb.append(","); + } else { + first = false; + } + if (hfs.getName() != null) { + sb.append(hfs.getName()); + sb.append(":"); + } + sb.append(hfs.toString()); } - - public HCatFieldSchema get(int position) { - return fieldSchemas.get(position); + return sb.toString(); + } + + public String getSchemaAsTypeString() { + boolean first = true; + StringBuilder sb = new StringBuilder(); + for (HCatFieldSchema hfs : fieldSchemas) { + if (!first) { + sb.append(","); + } else { + first = false; + } + if (hfs.getName() != null) { + sb.append(hfs.getName()); + sb.append(":"); + } + sb.append(hfs.getTypeString()); } + return sb.toString(); + } - public int size() { - return fieldSchemas.size(); + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; } - - public void remove(final HCatFieldSchema hcatFieldSchema) throws HCatException { - - if (!fieldSchemas.contains(hcatFieldSchema)) { - throw new HCatException("Attempt to delete a non-existent column from HCat Schema: " + hcatFieldSchema); - } - - fieldSchemas.remove(hcatFieldSchema); - fieldPositionMap.remove(hcatFieldSchema); - fieldNames.remove(hcatFieldSchema.getName()); + if (obj == null) { + return false; } - - @Override - public String toString() { - boolean first = true; - StringBuilder sb = new StringBuilder(); - for (HCatFieldSchema hfs : fieldSchemas) { - if (!first) { - sb.append(","); - } else { - first = false; - } - if (hfs.getName() != null) { - sb.append(hfs.getName()); - sb.append(":"); - } - sb.append(hfs.toString()); - } - return sb.toString(); + if (!(obj instanceof HCatSchema)) { + return false; } - - public String getSchemaAsTypeString() { - boolean first = true; - StringBuilder sb = new StringBuilder(); - for (HCatFieldSchema hfs : fieldSchemas) { - if (!first) { - sb.append(","); - } else { - first = false; - } - if (hfs.getName() != null) { - sb.append(hfs.getName()); - sb.append(":"); - } - sb.append(hfs.getTypeString()); - } - return sb.toString(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HCatSchema)) { - return false; - } - HCatSchema other = (HCatSchema) obj; - if (!this.getFields().equals(other.getFields())) { - return false; - } - return true; + HCatSchema other = (HCatSchema) obj; + if (!this.getFields().equals(other.getFields())) { + return false; } + return true; + } - @Override - public int hashCode() { - return toString().hashCode(); - } + @Override + public int hashCode() { + return toString().hashCode(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java index 4aad89f..5c327aa 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java @@ -42,205 +42,205 @@ */ public class HCatSchemaUtils { - public static CollectionBuilder getStructSchemaBuilder() { - return new CollectionBuilder(); - } - - public static CollectionBuilder getListSchemaBuilder() { - return new CollectionBuilder(); - } - - public static MapBuilder getMapSchemaBuilder() { - return new MapBuilder(); - } + public static CollectionBuilder getStructSchemaBuilder() { + return new CollectionBuilder(); + } + public static CollectionBuilder getListSchemaBuilder() { + return new CollectionBuilder(); + } - public static abstract class HCatSchemaBuilder { - public abstract HCatSchema build() throws HCatException; - } + public static MapBuilder getMapSchemaBuilder() { + return new MapBuilder(); + } - public static class CollectionBuilder extends HCatSchemaBuilder { // for STRUCTS(multiple-add-calls) and LISTS(single-add-call) - List fieldSchemas = null; - CollectionBuilder() { - fieldSchemas = new ArrayList(); - } - - public CollectionBuilder addField(FieldSchema fieldSchema) throws HCatException { - return this.addField(getHCatFieldSchema(fieldSchema)); - } - - public CollectionBuilder addField(HCatFieldSchema fieldColumnSchema) { - fieldSchemas.add(fieldColumnSchema); - return this; - } - - @Override - public HCatSchema build() throws HCatException { - return new HCatSchema(fieldSchemas); - } - - } - - public static class MapBuilder extends HCatSchemaBuilder { - - Type keyType = null; - HCatSchema valueSchema = null; - - @Override - public HCatSchema build() throws HCatException { - List fslist = new ArrayList(); - fslist.add(new HCatFieldSchema(null, Type.MAP, keyType, valueSchema, null)); - return new HCatSchema(fslist); - } - - public MapBuilder withValueSchema(HCatSchema valueSchema) { - this.valueSchema = valueSchema; - return this; - } - - public MapBuilder withKeyType(Type keyType) { - this.keyType = keyType; - return this; - } - - } - - - /** - * Convert a HCatFieldSchema to a FieldSchema - * @param fs FieldSchema to convert - * @return HCatFieldSchema representation of FieldSchema - * @throws HCatException - */ - public static HCatFieldSchema getHCatFieldSchema(FieldSchema fs) throws HCatException { - String fieldName = fs.getName(); - TypeInfo baseTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); - return getHCatFieldSchema(fieldName, baseTypeInfo); - } - - private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo) throws HCatException { - Category typeCategory = fieldTypeInfo.getCategory(); - HCatFieldSchema hCatFieldSchema; - if (Category.PRIMITIVE == typeCategory) { - hCatFieldSchema = new HCatFieldSchema(fieldName, getPrimitiveHType(fieldTypeInfo), null); - } else if (Category.STRUCT == typeCategory) { - HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, null); - } else if (Category.LIST == typeCategory) { - HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo()); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, null); - } else if (Category.MAP == typeCategory) { - HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo()); - HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo()); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.MAP, mapKeyType, subSchema, null); - } else { - throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null); - } - return hCatFieldSchema; - } - - private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) { - switch (((PrimitiveTypeInfo) basePrimitiveTypeInfo).getPrimitiveCategory()) { - case BOOLEAN: - return (HCatContext.INSTANCE.getConf().isPresent() && - HCatContext.INSTANCE.getConf().get().getBoolean( - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) ? - Type.INT : Type.BOOLEAN; - case BYTE: - return (HCatContext.INSTANCE.getConf().isPresent() && - HCatContext.INSTANCE.getConf().get().getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) ? - Type.INT : Type.TINYINT; - case DOUBLE: - return Type.DOUBLE; - case FLOAT: - return Type.FLOAT; - case INT: - return Type.INT; - case LONG: - return Type.BIGINT; - case SHORT: - return (HCatContext.INSTANCE.getConf().isPresent() && - HCatContext.INSTANCE.getConf().get().getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) ? - Type.INT : Type.SMALLINT; - case STRING: - return Type.STRING; - case BINARY: - return Type.BINARY; - default: - throw new TypeNotPresentException(((PrimitiveTypeInfo) basePrimitiveTypeInfo).getTypeName(), null); - } - } - - public static HCatSchema getHCatSchema(Schema schema) throws HCatException { - return getHCatSchema(schema.getFieldSchemas()); - } - - public static HCatSchema getHCatSchema(List fslist) throws HCatException { - CollectionBuilder builder = getStructSchemaBuilder(); - for (FieldSchema fieldSchema : fslist) { - builder.addField(fieldSchema); - } - return builder.build(); - } - - private static HCatSchema constructHCatSchema(StructTypeInfo stypeInfo) throws HCatException { - CollectionBuilder builder = getStructSchemaBuilder(); - for (String fieldName : ((StructTypeInfo) stypeInfo).getAllStructFieldNames()) { - builder.addField(getHCatFieldSchema(fieldName, ((StructTypeInfo) stypeInfo).getStructFieldTypeInfo(fieldName))); - } - return builder.build(); - } - - public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException { - Category typeCategory = typeInfo.getCategory(); - HCatSchema hCatSchema; - if (Category.PRIMITIVE == typeCategory) { - hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, getPrimitiveHType(typeInfo), null)).build(); - } else if (Category.STRUCT == typeCategory) { - HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo); - hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build(); - } else if (Category.LIST == typeCategory) { - CollectionBuilder builder = getListSchemaBuilder(); - builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo())); - hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), ""))); - } else if (Category.MAP == typeCategory) { - HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); - HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); - MapBuilder builder = getMapSchemaBuilder(); - hCatSchema = builder.withKeyType(mapKeyType).withValueSchema(subSchema).build(); - } else { - throw new TypeNotPresentException(typeInfo.getTypeName(), null); - } - return hCatSchema; - } - - public static HCatSchema getHCatSchemaFromTypeString(String typeString) throws HCatException { - return getHCatSchema(TypeInfoUtils.getTypeInfoFromTypeString(typeString)); - } - - public static HCatSchema getHCatSchema(String schemaString) throws HCatException { - if ((schemaString == null) || (schemaString.trim().isEmpty())) { - return new HCatSchema(new ArrayList()); // empty HSchema construct - } - HCatSchema outerSchema = getHCatSchemaFromTypeString("struct<" + schemaString + ">"); - return outerSchema.get(0).getStructSubSchema(); - } - - public static FieldSchema getFieldSchema(HCatFieldSchema hcatFieldSchema) { - return new FieldSchema(hcatFieldSchema.getName(), hcatFieldSchema.getTypeString(), hcatFieldSchema.getComment()); - } - - public static List getFieldSchemas(List hcatFieldSchemas) { - List lfs = new ArrayList(); - for (HCatFieldSchema hfs : hcatFieldSchemas) { - lfs.add(getFieldSchema(hfs)); - } - return lfs; - } + public static abstract class HCatSchemaBuilder { + public abstract HCatSchema build() throws HCatException; + } + + public static class CollectionBuilder extends HCatSchemaBuilder { // for STRUCTS(multiple-add-calls) and LISTS(single-add-call) + List fieldSchemas = null; + + CollectionBuilder() { + fieldSchemas = new ArrayList(); + } + + public CollectionBuilder addField(FieldSchema fieldSchema) throws HCatException { + return this.addField(getHCatFieldSchema(fieldSchema)); + } + + public CollectionBuilder addField(HCatFieldSchema fieldColumnSchema) { + fieldSchemas.add(fieldColumnSchema); + return this; + } + + @Override + public HCatSchema build() throws HCatException { + return new HCatSchema(fieldSchemas); + } + + } + + public static class MapBuilder extends HCatSchemaBuilder { + + Type keyType = null; + HCatSchema valueSchema = null; + + @Override + public HCatSchema build() throws HCatException { + List fslist = new ArrayList(); + fslist.add(new HCatFieldSchema(null, Type.MAP, keyType, valueSchema, null)); + return new HCatSchema(fslist); + } + + public MapBuilder withValueSchema(HCatSchema valueSchema) { + this.valueSchema = valueSchema; + return this; + } + + public MapBuilder withKeyType(Type keyType) { + this.keyType = keyType; + return this; + } + + } + + + /** + * Convert a HCatFieldSchema to a FieldSchema + * @param fs FieldSchema to convert + * @return HCatFieldSchema representation of FieldSchema + * @throws HCatException + */ + public static HCatFieldSchema getHCatFieldSchema(FieldSchema fs) throws HCatException { + String fieldName = fs.getName(); + TypeInfo baseTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); + return getHCatFieldSchema(fieldName, baseTypeInfo); + } + + private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo) throws HCatException { + Category typeCategory = fieldTypeInfo.getCategory(); + HCatFieldSchema hCatFieldSchema; + if (Category.PRIMITIVE == typeCategory) { + hCatFieldSchema = new HCatFieldSchema(fieldName, getPrimitiveHType(fieldTypeInfo), null); + } else if (Category.STRUCT == typeCategory) { + HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, null); + } else if (Category.LIST == typeCategory) { + HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo()); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, null); + } else if (Category.MAP == typeCategory) { + HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo()); + HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo()); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.MAP, mapKeyType, subSchema, null); + } else { + throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null); + } + return hCatFieldSchema; + } + + private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) { + switch (((PrimitiveTypeInfo) basePrimitiveTypeInfo).getPrimitiveCategory()) { + case BOOLEAN: + return (HCatContext.INSTANCE.getConf().isPresent() && + HCatContext.INSTANCE.getConf().get().getBoolean( + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) ? + Type.INT : Type.BOOLEAN; + case BYTE: + return (HCatContext.INSTANCE.getConf().isPresent() && + HCatContext.INSTANCE.getConf().get().getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) ? + Type.INT : Type.TINYINT; + case DOUBLE: + return Type.DOUBLE; + case FLOAT: + return Type.FLOAT; + case INT: + return Type.INT; + case LONG: + return Type.BIGINT; + case SHORT: + return (HCatContext.INSTANCE.getConf().isPresent() && + HCatContext.INSTANCE.getConf().get().getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) ? + Type.INT : Type.SMALLINT; + case STRING: + return Type.STRING; + case BINARY: + return Type.BINARY; + default: + throw new TypeNotPresentException(((PrimitiveTypeInfo) basePrimitiveTypeInfo).getTypeName(), null); + } + } + + public static HCatSchema getHCatSchema(Schema schema) throws HCatException { + return getHCatSchema(schema.getFieldSchemas()); + } + + public static HCatSchema getHCatSchema(List fslist) throws HCatException { + CollectionBuilder builder = getStructSchemaBuilder(); + for (FieldSchema fieldSchema : fslist) { + builder.addField(fieldSchema); + } + return builder.build(); + } + + private static HCatSchema constructHCatSchema(StructTypeInfo stypeInfo) throws HCatException { + CollectionBuilder builder = getStructSchemaBuilder(); + for (String fieldName : ((StructTypeInfo) stypeInfo).getAllStructFieldNames()) { + builder.addField(getHCatFieldSchema(fieldName, ((StructTypeInfo) stypeInfo).getStructFieldTypeInfo(fieldName))); + } + return builder.build(); + } + + public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException { + Category typeCategory = typeInfo.getCategory(); + HCatSchema hCatSchema; + if (Category.PRIMITIVE == typeCategory) { + hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, getPrimitiveHType(typeInfo), null)).build(); + } else if (Category.STRUCT == typeCategory) { + HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo); + hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build(); + } else if (Category.LIST == typeCategory) { + CollectionBuilder builder = getListSchemaBuilder(); + builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo())); + hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), ""))); + } else if (Category.MAP == typeCategory) { + HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); + HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); + MapBuilder builder = getMapSchemaBuilder(); + hCatSchema = builder.withKeyType(mapKeyType).withValueSchema(subSchema).build(); + } else { + throw new TypeNotPresentException(typeInfo.getTypeName(), null); + } + return hCatSchema; + } + + public static HCatSchema getHCatSchemaFromTypeString(String typeString) throws HCatException { + return getHCatSchema(TypeInfoUtils.getTypeInfoFromTypeString(typeString)); + } + + public static HCatSchema getHCatSchema(String schemaString) throws HCatException { + if ((schemaString == null) || (schemaString.trim().isEmpty())) { + return new HCatSchema(new ArrayList()); // empty HSchema construct + } + HCatSchema outerSchema = getHCatSchemaFromTypeString("struct<" + schemaString + ">"); + return outerSchema.get(0).getStructSubSchema(); + } + + public static FieldSchema getFieldSchema(HCatFieldSchema hcatFieldSchema) { + return new FieldSchema(hcatFieldSchema.getName(), hcatFieldSchema.getTypeString(), hcatFieldSchema.getComment()); + } + + public static List getFieldSchemas(List hcatFieldSchemas) { + List lfs = new ArrayList(); + for (HCatFieldSchema hfs : hcatFieldSchemas) { + lfs.add(getFieldSchema(hfs)); + } + return lfs; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/DataTransferFactory.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/DataTransferFactory.java index 8243083..26e3fa3 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/DataTransferFactory.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/DataTransferFactory.java @@ -36,102 +36,102 @@ public class DataTransferFactory { - /** - * This should be called once from master node to obtain an instance of - * {@link HCatReader}. - * - * @param re - * ReadEntity built using {@link ReadEntity.Builder} - * @param config - * any configuration which master node wants to pass to HCatalog - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final ReadEntity re, - final Map config) { - // In future, this may examine ReadEntity and/or config to return - // appropriate HCatReader - return new HCatInputFormatReader(re, config); - } + /** + * This should be called once from master node to obtain an instance of + * {@link HCatReader}. + * + * @param re + * ReadEntity built using {@link ReadEntity.Builder} + * @param config + * any configuration which master node wants to pass to HCatalog + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final ReadEntity re, + final Map config) { + // In future, this may examine ReadEntity and/or config to return + // appropriate HCatReader + return new HCatInputFormatReader(re, config); + } - /** - * This should only be called once from every slave node to obtain an instance - * of {@link HCatReader}. - * - * @param split - * input split obtained at master node - * @param config - * configuration obtained at master node - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final InputSplit split, - final Configuration config) { - // In future, this may examine config to return appropriate HCatReader - return getHCatReader(split, config, DefaultStateProvider.get()); - } + /** + * This should only be called once from every slave node to obtain an instance + * of {@link HCatReader}. + * + * @param split + * input split obtained at master node + * @param config + * configuration obtained at master node + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final InputSplit split, + final Configuration config) { + // In future, this may examine config to return appropriate HCatReader + return getHCatReader(split, config, DefaultStateProvider.get()); + } - /** - * This should only be called once from every slave node to obtain an instance - * of {@link HCatReader}. This should be called if an external system has some - * state to provide to HCatalog. - * - * @param split - * input split obtained at master node - * @param config - * configuration obtained at master node - * @param sp - * {@link StateProvider} - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final InputSplit split, - final Configuration config, StateProvider sp) { - // In future, this may examine config to return appropriate HCatReader - return new HCatInputFormatReader(split, config, sp); - } + /** + * This should only be called once from every slave node to obtain an instance + * of {@link HCatReader}. This should be called if an external system has some + * state to provide to HCatalog. + * + * @param split + * input split obtained at master node + * @param config + * configuration obtained at master node + * @param sp + * {@link StateProvider} + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final InputSplit split, + final Configuration config, StateProvider sp) { + // In future, this may examine config to return appropriate HCatReader + return new HCatInputFormatReader(split, config, sp); + } - /** - * This should be called at master node to obtain an instance of - * {@link HCatWriter}. - * - * @param we - * WriteEntity built using {@link WriteEntity.Builder} - * @param config - * any configuration which master wants to pass to HCatalog - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriteEntity we, - final Map config) { - // In future, this may examine WriteEntity and/or config to return - // appropriate HCatWriter - return new HCatOutputFormatWriter(we, config); - } + /** + * This should be called at master node to obtain an instance of + * {@link HCatWriter}. + * + * @param we + * WriteEntity built using {@link WriteEntity.Builder} + * @param config + * any configuration which master wants to pass to HCatalog + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriteEntity we, + final Map config) { + // In future, this may examine WriteEntity and/or config to return + // appropriate HCatWriter + return new HCatOutputFormatWriter(we, config); + } - /** - * This should be called at slave nodes to obtain an instance of - * {@link HCatWriter}. - * - * @param cntxt - * {@link WriterContext} obtained at master node - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriterContext cntxt) { - // In future, this may examine context to return appropriate HCatWriter - return getHCatWriter(cntxt, DefaultStateProvider.get()); - } + /** + * This should be called at slave nodes to obtain an instance of + * {@link HCatWriter}. + * + * @param cntxt + * {@link WriterContext} obtained at master node + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriterContext cntxt) { + // In future, this may examine context to return appropriate HCatWriter + return getHCatWriter(cntxt, DefaultStateProvider.get()); + } - /** - * This should be called at slave nodes to obtain an instance of - * {@link HCatWriter}. If an external system has some mechanism for providing - * state to HCatalog, this constructor can be used. - * - * @param cntxt - * {@link WriterContext} obtained at master node - * @param sp - * {@link StateProvider} - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriterContext cntxt, - final StateProvider sp) { - // In future, this may examine context to return appropriate HCatWriter - return new HCatOutputFormatWriter(cntxt.getConf(), sp); - } + /** + * This should be called at slave nodes to obtain an instance of + * {@link HCatWriter}. If an external system has some mechanism for providing + * state to HCatalog, this constructor can be used. + * + * @param cntxt + * {@link WriterContext} obtained at master node + * @param sp + * {@link StateProvider} + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriterContext cntxt, + final StateProvider sp) { + // In future, this may examine context to return appropriate HCatWriter + return new HCatOutputFormatWriter(cntxt.getConf(), sp); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/EntityBase.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/EntityBase.java index 7814b88..b82a8ec 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/EntityBase.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/EntityBase.java @@ -31,31 +31,31 @@ abstract class EntityBase { - String region; - String tableName; - String dbName; - Map partitionKVs; + String region; + String tableName; + String dbName; + Map partitionKVs; - /** - * Common methods for {@link ReadEntity} and {@link WriteEntity} - */ + /** + * Common methods for {@link ReadEntity} and {@link WriteEntity} + */ - abstract static class Entity extends EntityBase { + abstract static class Entity extends EntityBase { - public String getRegion() { - return region; - } + public String getRegion() { + return region; + } - public String getTableName() { - return tableName; - } + public String getTableName() { + return tableName; + } - public String getDbName() { - return dbName; - } + public String getDbName() { + return dbName; + } - public Map getPartitionKVs() { - return partitionKVs; - } + public Map getPartitionKVs() { + return partitionKVs; } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatReader.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatReader.java index 54550ec..5eebf16 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatReader.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatReader.java @@ -36,68 +36,68 @@ public abstract class HCatReader { - /** - * This should be called at master node to obtain {@link ReaderContext} which - * then should be serialized and sent to slave nodes. - * - * @return {@link ReaderContext} - * @throws HCatException - */ - public abstract ReaderContext prepareRead() throws HCatException; + /** + * This should be called at master node to obtain {@link ReaderContext} which + * then should be serialized and sent to slave nodes. + * + * @return {@link ReaderContext} + * @throws HCatException + */ + public abstract ReaderContext prepareRead() throws HCatException; - /** - * This should be called at slave nodes to read {@link HCatRecord}s - * - * @return {@link Iterator} of {@link HCatRecord} - * @throws HCatException - */ - public abstract Iterator read() throws HCatException; + /** + * This should be called at slave nodes to read {@link HCatRecord}s + * + * @return {@link Iterator} of {@link HCatRecord} + * @throws HCatException + */ + public abstract Iterator read() throws HCatException; - /** - * This constructor will be invoked by {@link DataTransferFactory} at master - * node. Don't use this constructor. Instead, use {@link DataTransferFactory} - * - * @param re - * @param config - */ - protected HCatReader(final ReadEntity re, final Map config) { - this(config); - this.re = re; - } + /** + * This constructor will be invoked by {@link DataTransferFactory} at master + * node. Don't use this constructor. Instead, use {@link DataTransferFactory} + * + * @param re + * @param config + */ + protected HCatReader(final ReadEntity re, final Map config) { + this(config); + this.re = re; + } - /** - * This constructor will be invoked by {@link DataTransferFactory} at slave - * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory} - * - * @param config - * @param sp - */ + /** + * This constructor will be invoked by {@link DataTransferFactory} at slave + * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory} + * + * @param config + * @param sp + */ - protected HCatReader(final Configuration config, StateProvider sp) { - this.conf = config; - this.sp = sp; - } + protected HCatReader(final Configuration config, StateProvider sp) { + this.conf = config; + this.sp = sp; + } - protected ReadEntity re; // This will be null at slaves. - protected Configuration conf; - protected ReaderContext info; - protected StateProvider sp; // This will be null at master. + protected ReadEntity re; // This will be null at slaves. + protected Configuration conf; + protected ReaderContext info; + protected StateProvider sp; // This will be null at master. - private HCatReader(final Map config) { - Configuration conf = new Configuration(); - if (null != config) { - for (Entry kv : config.entrySet()) { - conf.set(kv.getKey(), kv.getValue()); - } - } - this.conf = conf; + private HCatReader(final Map config) { + Configuration conf = new Configuration(); + if (null != config) { + for (Entry kv : config.entrySet()) { + conf.set(kv.getKey(), kv.getValue()); + } } + this.conf = conf; + } - public Configuration getConf() { - if (null == conf) { - throw new IllegalStateException( - "HCatReader is not constructed correctly."); - } - return conf; + public Configuration getConf() { + if (null == conf) { + throw new IllegalStateException( + "HCatReader is not constructed correctly."); } + return conf; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatWriter.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatWriter.java index c4ea928..7e11fd1 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatWriter.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatWriter.java @@ -37,79 +37,79 @@ public abstract class HCatWriter { - protected Configuration conf; - protected WriteEntity we; // This will be null at slave nodes. - protected WriterContext info; - protected StateProvider sp; + protected Configuration conf; + protected WriteEntity we; // This will be null at slave nodes. + protected WriterContext info; + protected StateProvider sp; - /** - * External system should invoke this method exactly once from a master node. - * - * @return {@link WriterContext} This should be serialized and sent to slave - * nodes to construct HCatWriter there. - * @throws HCatException - */ - public abstract WriterContext prepareWrite() throws HCatException; + /** + * External system should invoke this method exactly once from a master node. + * + * @return {@link WriterContext} This should be serialized and sent to slave + * nodes to construct HCatWriter there. + * @throws HCatException + */ + public abstract WriterContext prepareWrite() throws HCatException; - /** - * This method should be used at slave needs to perform writes. - * - * @param recordItr - * {@link Iterator} records to be written into HCatalog. - * @throws {@link HCatException} - */ - public abstract void write(final Iterator recordItr) - throws HCatException; + /** + * This method should be used at slave needs to perform writes. + * + * @param recordItr + * {@link Iterator} records to be written into HCatalog. + * @throws {@link HCatException} + */ + public abstract void write(final Iterator recordItr) + throws HCatException; - /** - * This method should be called at master node. Primary purpose of this is to - * do metadata commit. - * - * @throws {@link HCatException} - */ - public abstract void commit(final WriterContext context) throws HCatException; + /** + * This method should be called at master node. Primary purpose of this is to + * do metadata commit. + * + * @throws {@link HCatException} + */ + public abstract void commit(final WriterContext context) throws HCatException; - /** - * This method should be called at master node. Primary purpose of this is to - * do cleanups in case of failures. - * - * @throws {@link HCatException} * - */ - public abstract void abort(final WriterContext context) throws HCatException; + /** + * This method should be called at master node. Primary purpose of this is to + * do cleanups in case of failures. + * + * @throws {@link HCatException} * + */ + public abstract void abort(final WriterContext context) throws HCatException; - /** - * This constructor will be used at master node - * - * @param we - * WriteEntity defines where in storage records should be written to. - * @param config - * Any configuration which external system wants to communicate to - * HCatalog for performing writes. - */ - protected HCatWriter(final WriteEntity we, final Map config) { - this(config); - this.we = we; - } - - /** - * This constructor will be used at slave nodes. - * - * @param config - */ - protected HCatWriter(final Configuration config, final StateProvider sp) { - this.conf = config; - this.sp = sp; - } + /** + * This constructor will be used at master node + * + * @param we + * WriteEntity defines where in storage records should be written to. + * @param config + * Any configuration which external system wants to communicate to + * HCatalog for performing writes. + */ + protected HCatWriter(final WriteEntity we, final Map config) { + this(config); + this.we = we; + } - private HCatWriter(final Map config) { - Configuration conf = new Configuration(); - if (config != null) { - // user is providing config, so it could be null. - for (Entry kv : config.entrySet()) { - conf.set(kv.getKey(), kv.getValue()); - } - } + /** + * This constructor will be used at slave nodes. + * + * @param config + */ + protected HCatWriter(final Configuration config, final StateProvider sp) { + this.conf = config; + this.sp = sp; + } - this.conf = conf; + private HCatWriter(final Map config) { + Configuration conf = new Configuration(); + if (config != null) { + // user is providing config, so it could be null. + for (Entry kv : config.entrySet()) { + conf.set(kv.getKey(), kv.getValue()); + } } + + this.conf = conf; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReadEntity.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReadEntity.java index b72cf73..a28022f 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReadEntity.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReadEntity.java @@ -26,67 +26,67 @@ */ public class ReadEntity extends EntityBase.Entity { + private String filterString; + + /** + * Don't instantiate {@link ReadEntity} directly. Use, + * {@link ReadEntity.Builder} instead. + * + */ + private ReadEntity() { + // Not allowed + } + + private ReadEntity(Builder builder) { + + this.region = builder.region; + this.dbName = builder.dbName; + this.tableName = builder.tableName; + this.partitionKVs = builder.partitionKVs; + this.filterString = builder.filterString; + } + + public String getFilterString() { + return this.filterString; + } + + /** + * This class should be used to build {@link ReadEntity}. It follows builder + * pattern, letting you build your {@link ReadEntity} with whatever level of + * detail you want. + * + */ + public static class Builder extends EntityBase { + private String filterString; - /** - * Don't instantiate {@link ReadEntity} directly. Use, - * {@link ReadEntity.Builder} instead. - * - */ - private ReadEntity() { - // Not allowed + public Builder withRegion(final String region) { + this.region = region; + return this; + } + + public Builder withDatabase(final String dbName) { + this.dbName = dbName; + return this; } - private ReadEntity(Builder builder) { + public Builder withTable(final String tblName) { + this.tableName = tblName; + return this; + } - this.region = builder.region; - this.dbName = builder.dbName; - this.tableName = builder.tableName; - this.partitionKVs = builder.partitionKVs; - this.filterString = builder.filterString; + public Builder withPartition(final Map partKVs) { + this.partitionKVs = partKVs; + return this; } - public String getFilterString() { - return this.filterString; + public Builder withFilter(String filterString) { + this.filterString = filterString; + return this; } - /** - * This class should be used to build {@link ReadEntity}. It follows builder - * pattern, letting you build your {@link ReadEntity} with whatever level of - * detail you want. - * - */ - public static class Builder extends EntityBase { - - private String filterString; - - public Builder withRegion(final String region) { - this.region = region; - return this; - } - - public Builder withDatabase(final String dbName) { - this.dbName = dbName; - return this; - } - - public Builder withTable(final String tblName) { - this.tableName = tblName; - return this; - } - - public Builder withPartition(final Map partKVs) { - this.partitionKVs = partKVs; - return this; - } - - public Builder withFilter(String filterString) { - this.filterString = filterString; - return this; - } - - public ReadEntity build() { - return new ReadEntity(this); - } + public ReadEntity build() { + return new ReadEntity(this); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReaderContext.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReaderContext.java index 08be77f..249b097 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReaderContext.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReaderContext.java @@ -40,51 +40,51 @@ */ public class ReaderContext implements Externalizable, Configurable { - private static final long serialVersionUID = -2656468331739574367L; - private List splits; - private Configuration conf; + private static final long serialVersionUID = -2656468331739574367L; + private List splits; + private Configuration conf; - public ReaderContext() { - this.splits = new ArrayList(); - this.conf = new Configuration(); - } + public ReaderContext() { + this.splits = new ArrayList(); + this.conf = new Configuration(); + } - public void setInputSplits(final List splits) { - this.splits = splits; - } + public void setInputSplits(final List splits) { + this.splits = splits; + } - public List getSplits() { - return splits; - } + public List getSplits() { + return splits; + } - @Override - public Configuration getConf() { - return conf; - } + @Override + public Configuration getConf() { + return conf; + } - @Override - public void setConf(final Configuration config) { - conf = config; - } + @Override + public void setConf(final Configuration config) { + conf = config; + } - @Override - public void writeExternal(ObjectOutput out) throws IOException { - conf.write(out); - out.writeInt(splits.size()); - for (InputSplit split : splits) { - ((HCatSplit) split).write(out); - } + @Override + public void writeExternal(ObjectOutput out) throws IOException { + conf.write(out); + out.writeInt(splits.size()); + for (InputSplit split : splits) { + ((HCatSplit) split).write(out); } + } - @Override - public void readExternal(ObjectInput in) throws IOException, - ClassNotFoundException { - conf.readFields(in); - int numOfSplits = in.readInt(); - for (int i = 0; i < numOfSplits; i++) { - HCatSplit split = new HCatSplit(); - split.readFields(in); - splits.add(split); - } + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + conf.readFields(in); + int numOfSplits = in.readInt(); + for (int i = 0; i < numOfSplits; i++) { + HCatSplit split = new HCatSplit(); + split.readFields(in); + splits.add(split); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriteEntity.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriteEntity.java index 9940e96..8a82e9d 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriteEntity.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriteEntity.java @@ -26,53 +26,53 @@ */ public class WriteEntity extends EntityBase.Entity { - /** - * Don't instantiate {@link WriteEntity} directly. Use, {@link Builder} to - * build {@link WriteEntity}. - */ + /** + * Don't instantiate {@link WriteEntity} directly. Use, {@link Builder} to + * build {@link WriteEntity}. + */ - private WriteEntity() { - // Not allowed. - } - - private WriteEntity(Builder builder) { - this.region = builder.region; - this.dbName = builder.dbName; - this.tableName = builder.tableName; - this.partitionKVs = builder.partitionKVs; - } + private WriteEntity() { + // Not allowed. + } - /** - * This class should be used to build {@link WriteEntity}. It follows builder - * pattern, letting you build your {@link WriteEntity} with whatever level of - * detail you want. - * - */ - public static class Builder extends EntityBase { + private WriteEntity(Builder builder) { + this.region = builder.region; + this.dbName = builder.dbName; + this.tableName = builder.tableName; + this.partitionKVs = builder.partitionKVs; + } - public Builder withRegion(final String region) { - this.region = region; - return this; - } + /** + * This class should be used to build {@link WriteEntity}. It follows builder + * pattern, letting you build your {@link WriteEntity} with whatever level of + * detail you want. + * + */ + public static class Builder extends EntityBase { - public Builder withDatabase(final String dbName) { - this.dbName = dbName; - return this; - } + public Builder withRegion(final String region) { + this.region = region; + return this; + } - public Builder withTable(final String tblName) { - this.tableName = tblName; - return this; - } + public Builder withDatabase(final String dbName) { + this.dbName = dbName; + return this; + } - public Builder withPartition(final Map partKVs) { - this.partitionKVs = partKVs; - return this; - } + public Builder withTable(final String tblName) { + this.tableName = tblName; + return this; + } - public WriteEntity build() { - return new WriteEntity(this); - } + public Builder withPartition(final Map partKVs) { + this.partitionKVs = partKVs; + return this; + } + public WriteEntity build() { + return new WriteEntity(this); } + + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriterContext.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriterContext.java index 9cd9a6d..48c3b1a 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriterContext.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriterContext.java @@ -36,31 +36,31 @@ */ public class WriterContext implements Externalizable, Configurable { - private static final long serialVersionUID = -5899374262971611840L; - private Configuration conf; + private static final long serialVersionUID = -5899374262971611840L; + private Configuration conf; - public WriterContext() { - conf = new Configuration(); - } + public WriterContext() { + conf = new Configuration(); + } - @Override - public Configuration getConf() { - return conf; - } + @Override + public Configuration getConf() { + return conf; + } - @Override - public void setConf(final Configuration config) { - this.conf = config; - } + @Override + public void setConf(final Configuration config) { + this.conf = config; + } - @Override - public void writeExternal(ObjectOutput out) throws IOException { - conf.write(out); - } + @Override + public void writeExternal(ObjectOutput out) throws IOException { + conf.write(out); + } - @Override - public void readExternal(ObjectInput in) throws IOException, - ClassNotFoundException { - conf.readFields(in); - } + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + conf.readFields(in); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatInputFormatReader.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatInputFormatReader.java index 01b2f7d..0f160c1 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatInputFormatReader.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatInputFormatReader.java @@ -46,92 +46,92 @@ */ public class HCatInputFormatReader extends HCatReader { - private InputSplit split; - - public HCatInputFormatReader(InputSplit split, Configuration config, - StateProvider sp) { - super(config, sp); - this.split = split; + private InputSplit split; + + public HCatInputFormatReader(InputSplit split, Configuration config, + StateProvider sp) { + super(config, sp); + this.split = split; + } + + public HCatInputFormatReader(ReadEntity info, Map config) { + super(info, config); + } + + @Override + public ReaderContext prepareRead() throws HCatException { + try { + Job job = new Job(conf); + HCatInputFormat hcif = HCatInputFormat.setInput( + job, re.getDbName(), re.getTableName()).setFilter(re.getFilterString()); + ReaderContext cntxt = new ReaderContext(); + cntxt.setInputSplits(hcif.getSplits( + ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); + cntxt.setConf(job.getConfiguration()); + return cntxt; + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } + } + + @Override + public Iterator read() throws HCatException { + + HCatInputFormat inpFmt = new HCatInputFormat(); + RecordReader rr; + try { + TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); + rr = inpFmt.createRecordReader(split, cntxt); + rr.initialize(split, cntxt); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + return new HCatRecordItr(rr); + } + + private static class HCatRecordItr implements Iterator { + + private RecordReader curRecReader; - public HCatInputFormatReader(ReadEntity info, Map config) { - super(info, config); + HCatRecordItr(RecordReader rr) { + curRecReader = rr; } @Override - public ReaderContext prepareRead() throws HCatException { - try { - Job job = new Job(conf); - HCatInputFormat hcif = HCatInputFormat.setInput( - job, re.getDbName(), re.getTableName()).setFilter(re.getFilterString()); - ReaderContext cntxt = new ReaderContext(); - cntxt.setInputSplits(hcif.getSplits( - ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); - cntxt.setConf(job.getConfiguration()); - return cntxt; - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + public boolean hasNext() { + try { + boolean retVal = curRecReader.nextKeyValue(); + if (retVal) { + return true; } + // if its false, we need to close recordReader. + curRecReader.close(); + return false; + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } @Override - public Iterator read() throws HCatException { - - HCatInputFormat inpFmt = new HCatInputFormat(); - RecordReader rr; - try { - TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); - rr = inpFmt.createRecordReader(split, cntxt); - rr.initialize(split, cntxt); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - return new HCatRecordItr(rr); + public HCatRecord next() { + try { + return curRecReader.getCurrentValue(); + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } - private static class HCatRecordItr implements Iterator { - - private RecordReader curRecReader; - - HCatRecordItr(RecordReader rr) { - curRecReader = rr; - } - - @Override - public boolean hasNext() { - try { - boolean retVal = curRecReader.nextKeyValue(); - if (retVal) { - return true; - } - // if its false, we need to close recordReader. - curRecReader.close(); - return false; - } catch (IOException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - @Override - public HCatRecord next() { - try { - return curRecReader.getCurrentValue(); - } catch (IOException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Not allowed"); - } + @Override + public void remove() { + throw new UnsupportedOperationException("Not allowed"); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java index d420416..f0a4857 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java @@ -48,114 +48,114 @@ */ public class HCatOutputFormatWriter extends HCatWriter { - public HCatOutputFormatWriter(WriteEntity we, Map config) { - super(we, config); - } + public HCatOutputFormatWriter(WriteEntity we, Map config) { + super(we, config); + } - public HCatOutputFormatWriter(Configuration config, StateProvider sp) { - super(config, sp); - } + public HCatOutputFormatWriter(Configuration config, StateProvider sp) { + super(config, sp); + } - @Override - public WriterContext prepareWrite() throws HCatException { - OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), - we.getTableName(), we.getPartitionKVs()); - Job job; - try { - job = new Job(conf); - HCatOutputFormat.setOutput(job, jobInfo); - HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job)); - HCatOutputFormat outFormat = new HCatOutputFormat(); - outFormat.checkOutputSpecs(job); - outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - WriterContext cntxt = new WriterContext(); - cntxt.setConf(job.getConfiguration()); - return cntxt; + @Override + public WriterContext prepareWrite() throws HCatException { + OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), + we.getTableName(), we.getPartitionKVs()); + Job job; + try { + job = new Job(conf); + HCatOutputFormat.setOutput(job, jobInfo); + HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job)); + HCatOutputFormat outFormat = new HCatOutputFormat(); + outFormat.checkOutputSpecs(job); + outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + WriterContext cntxt = new WriterContext(); + cntxt.setConf(job.getConfiguration()); + return cntxt; + } - @Override - public void write(Iterator recordItr) throws HCatException { + @Override + public void write(Iterator recordItr) throws HCatException { - int id = sp.getId(); - setVarsInConf(id); - HCatOutputFormat outFormat = new HCatOutputFormat(); - TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); - OutputCommitter committer = null; - RecordWriter, HCatRecord> writer; + int id = sp.getId(); + setVarsInConf(id); + HCatOutputFormat outFormat = new HCatOutputFormat(); + TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); + OutputCommitter committer = null; + RecordWriter, HCatRecord> writer; + try { + committer = outFormat.getOutputCommitter(cntxt); + committer.setupTask(cntxt); + writer = outFormat.getRecordWriter(cntxt); + while (recordItr.hasNext()) { + HCatRecord rec = recordItr.next(); + writer.write(null, rec); + } + writer.close(cntxt); + if (committer.needsTaskCommit(cntxt)) { + committer.commitTask(cntxt); + } + } catch (IOException e) { + if (null != committer) { try { - committer = outFormat.getOutputCommitter(cntxt); - committer.setupTask(cntxt); - writer = outFormat.getRecordWriter(cntxt); - while (recordItr.hasNext()) { - HCatRecord rec = recordItr.next(); - writer.write(null, rec); - } - writer.close(cntxt); - if (committer.needsTaskCommit(cntxt)) { - committer.commitTask(cntxt); - } - } catch (IOException e) { - if (null != committer) { - try { - committer.abortTask(cntxt); - } catch (IOException e1) { - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); - } - } - throw new HCatException("Failed while writing", e); - } catch (InterruptedException e) { - if (null != committer) { - try { - committer.abortTask(cntxt); - } catch (IOException e1) { - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); - } - } - throw new HCatException("Failed while writing", e); + committer.abortTask(cntxt); + } catch (IOException e1) { + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } - } - - @Override - public void commit(WriterContext context) throws HCatException { + } + throw new HCatException("Failed while writing", e); + } catch (InterruptedException e) { + if (null != committer) { try { - new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) - .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null)); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + committer.abortTask(cntxt); + } catch (IOException e1) { + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } + } + throw new HCatException("Failed while writing", e); } + } - @Override - public void abort(WriterContext context) throws HCatException { - try { - new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) - .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null), State.FAILED); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } + @Override + public void commit(WriterContext context) throws HCatException { + try { + new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) + .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null)); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + } - private void setVarsInConf(int id) { - - // Following two config keys are required by FileOutputFormat to work - // correctly. - // In usual case of Hadoop, JobTracker will set these before launching - // tasks. - // Since there is no jobtracker here, we set it ourself. - conf.setInt("mapred.task.partition", id); - conf.set("mapred.task.id", "attempt__0000_r_000000_" + id); + @Override + public void abort(WriterContext context) throws HCatException { + try { + new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) + .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null), State.FAILED); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + } + + private void setVarsInConf(int id) { + + // Following two config keys are required by FileOutputFormat to work + // correctly. + // In usual case of Hadoop, JobTracker will set these before launching + // tasks. + // Since there is no jobtracker here, we set it ourself. + conf.setInt("mapred.task.partition", id); + conf.set("mapred.task.id", "attempt__0000_r_000000_" + id); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/DefaultStateProvider.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/DefaultStateProvider.java index 80bf6c0..be25f8b 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/DefaultStateProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/DefaultStateProvider.java @@ -27,25 +27,25 @@ */ public class DefaultStateProvider implements StateProvider { - /** - * Default implementation. Here, ids are generated randomly. - */ - @Override - public int getId() { + /** + * Default implementation. Here, ids are generated randomly. + */ + @Override + public int getId() { - NumberFormat numberFormat = NumberFormat.getInstance(); - numberFormat.setMinimumIntegerDigits(5); - numberFormat.setGroupingUsed(false); - return Integer - .parseInt(numberFormat.format(Math.abs(new Random().nextInt()))); - } + NumberFormat numberFormat = NumberFormat.getInstance(); + numberFormat.setMinimumIntegerDigits(5); + numberFormat.setGroupingUsed(false); + return Integer + .parseInt(numberFormat.format(Math.abs(new Random().nextInt()))); + } - private static StateProvider sp; + private static StateProvider sp; - public static synchronized StateProvider get() { - if (null == sp) { - sp = new DefaultStateProvider(); - } - return sp; + public static synchronized StateProvider get() { + if (null == sp) { + sp = new DefaultStateProvider(); } + return sp; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/StateProvider.java b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/StateProvider.java index 2aec2ab..da8ab62 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/StateProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/StateProvider.java @@ -27,10 +27,10 @@ */ public interface StateProvider { - /** - * This method should return id assigned to slave node. - * - * @return id - */ - public int getId(); + /** + * This method should return id assigned to slave node. + * + * @return id + */ + public int getId(); } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/har/HarOutputCommitterPostProcessor.java b/hcatalog/core/src/main/java/org/apache/hcatalog/har/HarOutputCommitterPostProcessor.java index 10ad2da..7ee62be 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/har/HarOutputCommitterPostProcessor.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/har/HarOutputCommitterPostProcessor.java @@ -37,92 +37,92 @@ */ public class HarOutputCommitterPostProcessor { - boolean isEnabled = false; + boolean isEnabled = false; - public boolean isEnabled() { - return isEnabled; - } + public boolean isEnabled() { + return isEnabled; + } - public void setEnabled(boolean enabled) { - this.isEnabled = enabled; - } + public void setEnabled(boolean enabled) { + this.isEnabled = enabled; + } - public void exec(JobContext context, Partition partition, Path partPath) throws IOException { + public void exec(JobContext context, Partition partition, Path partPath) throws IOException { // LOG.info("Archiving partition ["+partPath.toString()+"]"); - makeHar(context, partPath.toUri().toString(), harFile(partPath)); - partition.getParameters().put(hive_metastoreConstants.IS_ARCHIVED, "true"); - } + makeHar(context, partPath.toUri().toString(), harFile(partPath)); + partition.getParameters().put(hive_metastoreConstants.IS_ARCHIVED, "true"); + } - public String harFile(Path ptnPath) throws IOException { - String harFile = ptnPath.toString().replaceFirst("/+$", "") + ".har"; + public String harFile(Path ptnPath) throws IOException { + String harFile = ptnPath.toString().replaceFirst("/+$", "") + ".har"; // LOG.info("har file : " + harFile); - return harFile; - } + return harFile; + } - public String getParentFSPath(Path ptnPath) throws IOException { - return ptnPath.toUri().getPath().replaceFirst("/+$", ""); - } + public String getParentFSPath(Path ptnPath) throws IOException { + return ptnPath.toUri().getPath().replaceFirst("/+$", ""); + } - public String getProcessedLocation(Path ptnPath) throws IOException { - String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR; + public String getProcessedLocation(Path ptnPath) throws IOException { + String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR; // LOG.info("har location : " + harLocn); - return harLocn; - } + return harLocn; + } - /** - * Creates a har file from the contents of a given directory, using that as root. - * @param dir Directory to archive - * @param harFile The HAR file to create - */ - public static void makeHar(JobContext context, String dir, String harFile) throws IOException { + /** + * Creates a har file from the contents of a given directory, using that as root. + * @param dir Directory to archive + * @param harFile The HAR file to create + */ + public static void makeHar(JobContext context, String dir, String harFile) throws IOException { // Configuration conf = context.getConfiguration(); // Credentials creds = context.getCredentials(); // HCatUtil.logAllTokens(LOG,context); - int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR); - Path archivePath = new Path(harFile.substring(0, lastSep)); - final String[] args = { - "-archiveName", - harFile.substring(lastSep + 1, harFile.length()), - "-p", - dir, - "*", - archivePath.toString() - }; + int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR); + Path archivePath = new Path(harFile.substring(0, lastSep)); + final String[] args = { + "-archiveName", + harFile.substring(lastSep + 1, harFile.length()), + "-p", + dir, + "*", + archivePath.toString() + }; // for (String arg : args){ // LOG.info("Args to har : "+ arg); // } - try { - Configuration newConf = new Configuration(); - FileSystem fs = archivePath.getFileSystem(newConf); + try { + Configuration newConf = new Configuration(); + FileSystem fs = archivePath.getFileSystem(newConf); - String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION); - if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) { - newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting); + String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION); + if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) { + newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting); // LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+ System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]"); - } + } // for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){ // LOG.info("src : "+ds.getPath().toUri().toString()); // } - final HadoopArchives har = new HadoopArchives(newConf); - int rc = ToolRunner.run(har, args); - if (rc != 0) { - throw new Exception("Har returned error code " + rc); - } + final HadoopArchives har = new HadoopArchives(newConf); + int rc = ToolRunner.run(har, args); + if (rc != 0) { + throw new Exception("Har returned error code " + rc); + } // for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){ // LOG.info("dest : "+hs.getPath().toUri().toString()); // } // doHarCheck(fs,harFile); // LOG.info("Nuking " + dir); - fs.delete(new Path(dir), true); - } catch (Exception e) { - throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e); - } + fs.delete(new Path(dir), true); + } catch (Exception e) { + throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java index 89f94ed..60ce7db 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java @@ -39,71 +39,71 @@ */ class DefaultOutputCommitterContainer extends OutputCommitterContainer { - private static final Logger LOG = LoggerFactory.getLogger(DefaultOutputCommitterContainer.class); + private static final Logger LOG = LoggerFactory.getLogger(DefaultOutputCommitterContainer.class); - /** - * @param context current JobContext - * @param baseCommitter OutputCommitter to contain - * @throws IOException - */ - public DefaultOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { - super(context, baseCommitter); - } + /** + * @param context current JobContext + * @param baseCommitter OutputCommitter to contain + * @throws IOException + */ + public DefaultOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { + super(context, baseCommitter); + } - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public void abortTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public void commitTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { + return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public void setupJob(JobContext context) throws IOException { - getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); - } + @Override + public void setupJob(JobContext context) throws IOException { + getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); + } - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public void setupTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - getBaseOutputCommitter().abortJob(HCatMapRedUtil.createJobContext(jobContext), state); - cleanupJob(jobContext); - } + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + getBaseOutputCommitter().abortJob(HCatMapRedUtil.createJobContext(jobContext), state); + cleanupJob(jobContext); + } - @Override - public void commitJob(JobContext jobContext) throws IOException { - getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext)); - cleanupJob(jobContext); - } + @Override + public void commitJob(JobContext jobContext) throws IOException { + getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext)); + cleanupJob(jobContext); + } - @Override - public void cleanupJob(JobContext context) throws IOException { - getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context)); + @Override + public void cleanupJob(JobContext context) throws IOException { + getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context)); - //Cancel HCat and JobTracker tokens - HiveMetaStoreClient client = null; - try { - HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - client.cancelDelegationToken(tokenStrForm); - } - } catch (Exception e) { - LOG.warn("Failed to cancel delegation token", e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } + //Cancel HCat and JobTracker tokens + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + client.cancelDelegationToken(tokenStrForm); + } + } catch (Exception e) { + LOG.warn("Failed to cancel delegation token", e); + } finally { + HCatUtil.closeHiveClientQuietly(client); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java index df0c141..dbfbc0c 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java @@ -41,62 +41,62 @@ */ class DefaultOutputFormatContainer extends OutputFormatContainer { - private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); + private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); - static { - NUMBER_FORMAT.setMinimumIntegerDigits(5); - NUMBER_FORMAT.setGroupingUsed(false); - } + static { + NUMBER_FORMAT.setMinimumIntegerDigits(5); + NUMBER_FORMAT.setGroupingUsed(false); + } - public DefaultOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, Writable> of) { - super(of); - } + public DefaultOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, Writable> of) { + super(of); + } - static synchronized String getOutputName(int partition) { - return "part-" + NUMBER_FORMAT.format(partition); - } + static synchronized String getOutputName(int partition) { + return "part-" + NUMBER_FORMAT.format(partition); + } - /** - * Get the record writer for the job. Uses the storagehandler's OutputFormat - * to get the record writer. - * @param context the information about the current task. - * @return a RecordWriter to write the output for the job. - * @throws IOException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); - return new DefaultRecordWriterContainer(context, - getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); - } + /** + * Get the record writer for the job. Uses the storagehandler's OutputFormat + * to get the record writer. + * @param context the information about the current task. + * @return a RecordWriter to write the output for the job. + * @throws IOException + */ + @Override + public RecordWriter, HCatRecord> + getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); + return new DefaultRecordWriterContainer(context, + getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); + } - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) - throws IOException, InterruptedException { - return new DefaultOutputCommitterContainer(context, new JobConf(context.getConfiguration()).getOutputCommitter()); - } + /** + * Get the output committer for this output format. This is responsible + * for ensuring the output is committed correctly. + * @param context the task context + * @return an output committer + * @throws IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new DefaultOutputCommitterContainer(context, new JobConf(context.getConfiguration()).getOutputCommitter()); + } - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - org.apache.hadoop.mapred.OutputFormat, ? super Writable> outputFormat = getBaseOutputFormat(); - JobConf jobConf = new JobConf(context.getConfiguration()); - outputFormat.checkOutputSpecs(null, jobConf); - HCatUtil.copyConf(jobConf, context.getConfiguration()); - } + /** + * Check for validity of the output-specification for the job. + * @param context information about the job + * @throws IOException when output should not be attempted + */ + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + org.apache.hadoop.mapred.OutputFormat, ? super Writable> outputFormat = getBaseOutputFormat(); + JobConf jobConf = new JobConf(context.getConfiguration()); + outputFormat.checkOutputSpecs(null, jobConf); + HCatUtil.copyConf(jobConf, context.getConfiguration()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java index a6c0eda..a312527 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java @@ -38,46 +38,46 @@ */ class DefaultRecordWriterContainer extends RecordWriterContainer { - private final HCatStorageHandler storageHandler; - private final SerDe serDe; - private final OutputJobInfo jobInfo; - private final ObjectInspector hcatRecordOI; + private final HCatStorageHandler storageHandler; + private final SerDe serDe; + private final OutputJobInfo jobInfo; + private final ObjectInspector hcatRecordOI; - /** - * @param context current JobContext - * @param baseRecordWriter RecordWriter to contain - * @throws IOException - * @throws InterruptedException - */ - public DefaultRecordWriterContainer(TaskAttemptContext context, - org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { - super(context, baseRecordWriter); - jobInfo = HCatOutputFormat.getJobInfo(context); - storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - HCatOutputFormat.configureOutputStorageHandler(context); - serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); - hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); - try { - InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to initialize SerDe", e); - } + /** + * @param context current JobContext + * @param baseRecordWriter RecordWriter to contain + * @throws IOException + * @throws InterruptedException + */ + public DefaultRecordWriterContainer(TaskAttemptContext context, + org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { + super(context, baseRecordWriter); + jobInfo = HCatOutputFormat.getJobInfo(context); + storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + HCatOutputFormat.configureOutputStorageHandler(context); + serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); + hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); + try { + InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); + } catch (SerDeException e) { + throw new IOException("Failed to initialize SerDe", e); } + } - @Override - public void close(TaskAttemptContext context) throws IOException, - InterruptedException { - getBaseRecordWriter().close(InternalUtil.createReporter(context)); - } + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + getBaseRecordWriter().close(InternalUtil.createReporter(context)); + } - @Override - public void write(WritableComparable key, HCatRecord value) throws IOException, - InterruptedException { - try { - getBaseRecordWriter().write(null, serDe.serialize(value.getAll(), hcatRecordOI)); - } catch (SerDeException e) { - throw new IOException("Failed to serialize object", e); - } + @Override + public void write(WritableComparable key, HCatRecord value) throws IOException, + InterruptedException { + try { + getBaseRecordWriter().write(null, serDe.serialize(value.getAll(), hcatRecordOI)); + } catch (SerDeException e) { + throw new IOException("Failed to serialize object", e); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java index acce51c..b041a14 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -68,680 +68,680 @@ */ class FileOutputCommitterContainer extends OutputCommitterContainer { - private static final String TEMP_DIR_NAME = "_temporary"; - private static final String LOGS_DIR_NAME = "_logs"; - - private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class); - private final boolean dynamicPartitioningUsed; - private boolean partitionsDiscovered; - - private Map> partitionsDiscoveredByPath; - private Map contextDiscoveredByPath; - private final HCatStorageHandler cachedStorageHandler; - - HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor(); - - private String ptnRootLocation = null; - - private OutputJobInfo jobInfo = null; - - /** - * @param context current JobContext - * @param baseCommitter OutputCommitter to contain - * @throws IOException - */ - public FileOutputCommitterContainer(JobContext context, - org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { - super(context, baseCommitter); - jobInfo = HCatOutputFormat.getJobInfo(context); - dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); - - this.partitionsDiscovered = !dynamicPartitioningUsed; - cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + private static final String TEMP_DIR_NAME = "_temporary"; + private static final String LOGS_DIR_NAME = "_logs"; + + private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class); + private final boolean dynamicPartitioningUsed; + private boolean partitionsDiscovered; + + private Map> partitionsDiscoveredByPath; + private Map contextDiscoveredByPath; + private final HCatStorageHandler cachedStorageHandler; + + HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor(); + + private String ptnRootLocation = null; + + private OutputJobInfo jobInfo = null; + + /** + * @param context current JobContext + * @param baseCommitter OutputCommitter to contain + * @throws IOException + */ + public FileOutputCommitterContainer(JobContext context, + org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { + super(context, baseCommitter); + jobInfo = HCatOutputFormat.getJobInfo(context); + dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); + + this.partitionsDiscovered = !dynamicPartitioningUsed; + cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + } + + @Override + public void abortTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); } - - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + } + + @Override + public void commitTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + //See HCATALOG-499 + FileOutputFormatContainer.setWorkOutputPath(context); + getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); } - - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - //See HCATALOG-499 - FileOutputFormatContainer.setWorkOutputPath(context); - getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); + } else { + // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default + return false; } + } - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); - } else { - // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default - return false; - } + @Override + public void setupJob(JobContext context) throws IOException { + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); } + // in dynamic usecase, called through FileRecordWriterContainer + } - @Override - public void setupJob(JobContext context) throws IOException { - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); - } - // in dynamic usecase, called through FileRecordWriterContainer + @Override + public void setupTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); } - - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + try { + if (dynamicPartitioningUsed) { + discoverPartitions(jobContext); + } + org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil + .createJobContext(jobContext); + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().abortJob(mapRedJobContext, state); + } else if (dynamicPartitioningUsed) { + for (JobContext currContext : contextDiscoveredByPath.values()) { + try { + new JobConf(currContext.getConfiguration()) + .getOutputCommitter().abortJob(currContext, + state); + } catch (Exception e) { + throw new IOException(e); + } } + } + Path src; + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); + if (dynamicPartitioningUsed) { + src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() + .getPartitionKeysSize())); + } else { + src = new Path(jobInfo.getLocation()); + } + FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); + LOG.info("Job failed. Cleaning up temporary directory [{}].", src); + fs.delete(src, true); + } finally { + cancelDelegationTokens(jobContext); } - - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - try { - if (dynamicPartitioningUsed) { - discoverPartitions(jobContext); - } - org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil - .createJobContext(jobContext); - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().abortJob(mapRedJobContext, state); - } else if (dynamicPartitioningUsed) { - for (JobContext currContext : contextDiscoveredByPath.values()) { - try { - new JobConf(currContext.getConfiguration()) - .getOutputCommitter().abortJob(currContext, - state); - } catch (Exception e) { - throw new IOException(e); - } - } - } - Path src; - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - if (dynamicPartitioningUsed) { - src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() - .getPartitionKeysSize())); - } else { - src = new Path(jobInfo.getLocation()); - } - FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); - LOG.info("Job failed. Cleaning up temporary directory [{}].", src); - fs.delete(src, true); - } finally { - cancelDelegationTokens(jobContext); + } + + public static final String SUCCEEDED_FILE_NAME = "_SUCCESS"; + static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = + "mapreduce.fileoutputcommitter.marksuccessfuljobs"; + + private static boolean getOutputDirMarking(Configuration conf) { + return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, + false); + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + try { + if (dynamicPartitioningUsed) { + discoverPartitions(jobContext); + // Commit each partition so it gets moved out of the job work + // dir + for (JobContext context : contextDiscoveredByPath.values()) { + new JobConf(context.getConfiguration()) + .getOutputCommitter().commitJob(context); } - } - - public static final String SUCCEEDED_FILE_NAME = "_SUCCESS"; - static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = - "mapreduce.fileoutputcommitter.marksuccessfuljobs"; - - private static boolean getOutputDirMarking(Configuration conf) { - return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, - false); - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - try { - if (dynamicPartitioningUsed) { - discoverPartitions(jobContext); - // Commit each partition so it gets moved out of the job work - // dir - for (JobContext context : contextDiscoveredByPath.values()) { - new JobConf(context.getConfiguration()) - .getOutputCommitter().commitJob(context); - } - } - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().commitJob( - HCatMapRedUtil.createJobContext(jobContext)); - } - registerPartitions(jobContext); - // create _SUCCESS FILE if so requested. - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - if (getOutputDirMarking(jobContext.getConfiguration())) { - Path outputPath = new Path(jobInfo.getLocation()); - FileSystem fileSys = outputPath.getFileSystem(jobContext - .getConfiguration()); - // create a file in the folder to mark it - if (fileSys.exists(outputPath)) { - Path filePath = new Path(outputPath, - SUCCEEDED_FILE_NAME); - if (!fileSys.exists(filePath)) { // may have been - // created by - // baseCommitter.commitJob() - fileSys.create(filePath).close(); - } - } - } - } finally { - cancelDelegationTokens(jobContext); + } + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().commitJob( + HCatMapRedUtil.createJobContext(jobContext)); + } + registerPartitions(jobContext); + // create _SUCCESS FILE if so requested. + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); + if (getOutputDirMarking(jobContext.getConfiguration())) { + Path outputPath = new Path(jobInfo.getLocation()); + FileSystem fileSys = outputPath.getFileSystem(jobContext + .getConfiguration()); + // create a file in the folder to mark it + if (fileSys.exists(outputPath)) { + Path filePath = new Path(outputPath, + SUCCEEDED_FILE_NAME); + if (!fileSys.exists(filePath)) { // may have been + // created by + // baseCommitter.commitJob() + fileSys.create(filePath).close(); + } } + } + } finally { + cancelDelegationTokens(jobContext); } - - @Override - public void cleanupJob(JobContext context) throws IOException { - throw new IOException("The method cleanupJob is deprecated and should not be called."); - } - - private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { - if (ptnRootLocation == null) { - // we only need to calculate it once, it'll be the same for other partitions in this job. - Path ptnRoot = new Path(ptnLocn); - for (int i = 0; i < numPtnKeys; i++) { + } + + @Override + public void cleanupJob(JobContext context) throws IOException { + throw new IOException("The method cleanupJob is deprecated and should not be called."); + } + + private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { + if (ptnRootLocation == null) { + // we only need to calculate it once, it'll be the same for other partitions in this job. + Path ptnRoot = new Path(ptnLocn); + for (int i = 0; i < numPtnKeys; i++) { // LOG.info("Getting parent of "+ptnRoot.getName()); - ptnRoot = ptnRoot.getParent(); - } - ptnRootLocation = ptnRoot.toString(); - } + ptnRoot = ptnRoot.getParent(); + } + ptnRootLocation = ptnRoot.toString(); + } // LOG.info("Returning final parent : "+ptnRootLocation); - return ptnRootLocation; + return ptnRootLocation; + } + + /** + * Generate partition metadata object to be used to add to metadata. + * @param context The job context. + * @param jobInfo The OutputJobInfo. + * @param partLocnRoot The table-equivalent location root of the partition + * (temporary dir if dynamic partition, table dir if static) + * @param partKVs The keyvalue pairs that form the partition + * @param outputSchema The output schema for the partition + * @param params The parameters to store inside the partition + * @param table The Table metadata object under which this Partition will reside + * @param fs FileSystem object to operate on the underlying filesystem + * @param grpName Group name that owns the table dir + * @param perms FsPermission that's the default permission of the table dir. + * @return Constructed Partition metadata object + * @throws java.io.IOException + */ + + private Partition constructPartition( + JobContext context, OutputJobInfo jobInfo, + String partLocnRoot, Map partKVs, + HCatSchema outputSchema, Map params, + Table table, FileSystem fs, + String grpName, FsPermission perms) throws IOException { + + Partition partition = new Partition(); + partition.setDbName(table.getDbName()); + partition.setTableName(table.getTableName()); + partition.setSd(new StorageDescriptor(table.getTTable().getSd())); + + List fields = new ArrayList(); + for (HCatFieldSchema fieldSchema : outputSchema.getFields()) { + fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema)); } - /** - * Generate partition metadata object to be used to add to metadata. - * @param context The job context. - * @param jobInfo The OutputJobInfo. - * @param partLocnRoot The table-equivalent location root of the partition - * (temporary dir if dynamic partition, table dir if static) - * @param partKVs The keyvalue pairs that form the partition - * @param outputSchema The output schema for the partition - * @param params The parameters to store inside the partition - * @param table The Table metadata object under which this Partition will reside - * @param fs FileSystem object to operate on the underlying filesystem - * @param grpName Group name that owns the table dir - * @param perms FsPermission that's the default permission of the table dir. - * @return Constructed Partition metadata object - * @throws java.io.IOException - */ - - private Partition constructPartition( - JobContext context, OutputJobInfo jobInfo, - String partLocnRoot, Map partKVs, - HCatSchema outputSchema, Map params, - Table table, FileSystem fs, - String grpName, FsPermission perms) throws IOException { - - Partition partition = new Partition(); - partition.setDbName(table.getDbName()); - partition.setTableName(table.getTableName()); - partition.setSd(new StorageDescriptor(table.getTTable().getSd())); - - List fields = new ArrayList(); - for (HCatFieldSchema fieldSchema : outputSchema.getFields()) { - fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema)); - } - - partition.getSd().setCols(fields); + partition.getSd().setCols(fields); - partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs)); + partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs)); - partition.setParameters(params); - - // Sets permissions and group name on partition dirs and files. - - Path partPath; - if (Boolean.valueOf((String)table.getProperty("EXTERNAL")) - && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { - // honor external table that specifies the location - partPath = new Path(jobInfo.getLocation()); - } else { - partPath = new Path(partLocnRoot); - int i = 0; - for (FieldSchema partKey : table.getPartitionKeys()) { - if (i++ != 0) { - applyGroupAndPerms(fs, partPath, perms, grpName, false); - } - partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); - } - } + partition.setParameters(params); - // Apply the group and permissions to the leaf partition and files. - // Need not bother in case of HDFS as permission is taken care of by setting UMask - if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) { - applyGroupAndPerms(fs, partPath, perms, grpName, true); - } + // Sets permissions and group name on partition dirs and files. - // Set the location in the StorageDescriptor - if (dynamicPartitioningUsed) { - String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs); - if (harProcessor.isEnabled()) { - harProcessor.exec(context, partition, partPath); - partition.getSd().setLocation( - harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination))); - } else { - partition.getSd().setLocation(dynamicPartitionDestination); - } - } else { - partition.getSd().setLocation(partPath.toString()); + Path partPath; + if (Boolean.valueOf((String)table.getProperty("EXTERNAL")) + && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { + // honor external table that specifies the location + partPath = new Path(jobInfo.getLocation()); + } else { + partPath = new Path(partLocnRoot); + int i = 0; + for (FieldSchema partKey : table.getPartitionKeys()) { + if (i++ != 0) { + applyGroupAndPerms(fs, partPath, perms, grpName, false); } - return partition; + partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); + } } - private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, - String group, boolean recursive) - throws IOException { - fs.setPermission(dir, permission); - if (recursive) { - for (FileStatus fileStatus : fs.listStatus(dir)) { - if (fileStatus.isDir()) { - applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true); - } else { - fs.setPermission(fileStatus.getPath(), permission); - } - } - } + // Apply the group and permissions to the leaf partition and files. + // Need not bother in case of HDFS as permission is taken care of by setting UMask + if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) { + applyGroupAndPerms(fs, partPath, perms, grpName, true); } - private String getFinalDynamicPartitionDestination(Table table, Map partKVs) { - // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> - // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA - Path partPath = new Path(table.getTTable().getSd().getLocation()); - for (FieldSchema partKey : table.getPartitionKeys()) { - partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); - } - return partPath.toString(); + // Set the location in the StorageDescriptor + if (dynamicPartitioningUsed) { + String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs); + if (harProcessor.isEnabled()) { + harProcessor.exec(context, partition, partPath); + partition.getSd().setLocation( + harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination))); + } else { + partition.getSd().setLocation(dynamicPartitionDestination); + } + } else { + partition.getSd().setLocation(partPath.toString()); } - - private Map getStorerParameterMap(StorerInfo storer) { - Map params = new HashMap(); - - //Copy table level hcat.* keys to the partition - for (Entry entry : storer.getProperties().entrySet()) { - params.put(entry.getKey().toString(), entry.getValue().toString()); + return partition; + } + + private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, + String group, boolean recursive) + throws IOException { + fs.setPermission(dir, permission); + if (recursive) { + for (FileStatus fileStatus : fs.listStatus(dir)) { + if (fileStatus.isDir()) { + applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true); + } else { + fs.setPermission(fileStatus.getPath(), permission); } - return params; + } } + } + + private String getFinalDynamicPartitionDestination(Table table, Map partKVs) { + // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> + // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA + Path partPath = new Path(table.getTTable().getSd().getLocation()); + for (FieldSchema partKey : table.getPartitionKeys()) { + partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); + } + return partPath.toString(); + } - private Path constructPartialPartPath(Path partialPath, String partKey, Map partKVs) { + private Map getStorerParameterMap(StorerInfo storer) { + Map params = new HashMap(); - StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey)); - sb.append("="); - sb.append(FileUtils.escapePathName(partKVs.get(partKey))); - return new Path(partialPath, sb.toString()); + //Copy table level hcat.* keys to the partition + for (Entry entry : storer.getProperties().entrySet()) { + params.put(entry.getKey().toString(), entry.getValue().toString()); } - - /** - * Update table schema, adding new columns as added for the partition. - * @param client the client - * @param table the table - * @param partitionSchema the schema of the partition - * @throws java.io.IOException Signals that an I/O exception has occurred. - * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception - * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception - * @throws org.apache.thrift.TException the t exception - */ - private void updateTableSchema(HiveMetaStoreClient client, Table table, - HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException { - - - List newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema); - - if (newColumns.size() != 0) { - List tableColumns = new ArrayList(table.getTTable().getSd().getCols()); - tableColumns.addAll(newColumns); - - //Update table schema to add the newly added columns - table.getTTable().getSd().setCols(tableColumns); - client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); - } + return params; + } + + private Path constructPartialPartPath(Path partialPath, String partKey, Map partKVs) { + + StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey)); + sb.append("="); + sb.append(FileUtils.escapePathName(partKVs.get(partKey))); + return new Path(partialPath, sb.toString()); + } + + /** + * Update table schema, adding new columns as added for the partition. + * @param client the client + * @param table the table + * @param partitionSchema the schema of the partition + * @throws java.io.IOException Signals that an I/O exception has occurred. + * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception + * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception + * @throws org.apache.thrift.TException the t exception + */ + private void updateTableSchema(HiveMetaStoreClient client, Table table, + HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException { + + + List newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema); + + if (newColumns.size() != 0) { + List tableColumns = new ArrayList(table.getTTable().getSd().getCols()); + tableColumns.addAll(newColumns); + + //Update table schema to add the newly added columns + table.getTTable().getSd().setCols(tableColumns); + client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); } - - /** - * Move all of the files from the temp directory to the final location - * @param fs the output file system - * @param file the file to move - * @param srcDir the source directory - * @param destDir the target directory - * @param dryRun - a flag that simply tests if this move would succeed or not based - * on whether other files exist where we're trying to copy - * @throws java.io.IOException - */ - private void moveTaskOutputs(FileSystem fs, - Path file, - Path srcDir, - Path destDir, final boolean dryRun) throws IOException { - - if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { - return; + } + + /** + * Move all of the files from the temp directory to the final location + * @param fs the output file system + * @param file the file to move + * @param srcDir the source directory + * @param destDir the target directory + * @param dryRun - a flag that simply tests if this move would succeed or not based + * on whether other files exist where we're trying to copy + * @throws java.io.IOException + */ + private void moveTaskOutputs(FileSystem fs, + Path file, + Path srcDir, + Path destDir, final boolean dryRun) throws IOException { + + if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { + return; + } + final Path finalOutputPath = getFinalPath(file, srcDir, destDir); + if (fs.isFile(file)) { + if (dryRun){ + if(LOG.isDebugEnabled()) { + LOG.debug("Testing if moving file: [" + file + "] to [" + + finalOutputPath + "] would cause a problem"); + } + if (fs.exists(finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + + ", duplicate publish not possible."); + } + } else { + if(LOG.isDebugEnabled()) { + LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); } - final Path finalOutputPath = getFinalPath(file, srcDir, destDir); - if (fs.isFile(file)) { - if (dryRun){ - if(LOG.isDebugEnabled()) { - LOG.debug("Testing if moving file: [" + file + "] to [" - + finalOutputPath + "] would cause a problem"); - } - if (fs.exists(finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath - + ", duplicate publish not possible."); - } - } else { - if(LOG.isDebugEnabled()) { - LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); - } - // Make sure the parent directory exists. It is not an error - // to recreate an existing directory - fs.mkdirs(finalOutputPath.getParent()); - if (!fs.rename(file, finalOutputPath)) { - if (!fs.delete(finalOutputPath, true)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); - } - if (!fs.rename(file, finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); - } - } + // Make sure the parent directory exists. It is not an error + // to recreate an existing directory + fs.mkdirs(finalOutputPath.getParent()); + if (!fs.rename(file, finalOutputPath)) { + if (!fs.delete(finalOutputPath, true)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); + } + if (!fs.rename(file, finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); + } + } + } + } else if(fs.getFileStatus(file).isDir()) { + FileStatus[] children = fs.listStatus(file); + FileStatus firstChild = null; + if (children != null) { + int index=0; + while (index < children.length) { + if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { + firstChild = children[index]; + break; + } + index++; + } + } + if(firstChild!=null && firstChild.isDir()) { + // If the first child is directory, then rest would be directory too according to HCatalog dir structure + // recurse in that case + for (FileStatus child : children) { + moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); + } + } else { + + if (!dryRun) { + if (dynamicPartitioningUsed) { + // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself + // instead of moving each file under the directory. See HCATALOG-538 + + final Path parentDir = finalOutputPath.getParent(); + // Create the directory + Path placeholder = new Path(parentDir, "_placeholder"); + if (fs.mkdirs(parentDir)) { + // It is weired but we need a placeholder, + // otherwise rename cannot move file to the right place + fs.create(placeholder).close(); } - } else if(fs.getFileStatus(file).isDir()) { - FileStatus[] children = fs.listStatus(file); - FileStatus firstChild = null; - if (children != null) { - int index=0; - while (index < children.length) { - if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { - firstChild = children[index]; - break; - } - index++; - } + if (LOG.isDebugEnabled()) { + LOG.debug("Moving directory: " + file + " to " + parentDir); } - if(firstChild!=null && firstChild.isDir()) { - // If the first child is directory, then rest would be directory too according to HCatalog dir structure - // recurse in that case - for (FileStatus child : children) { - moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); - } - } else { - - if (!dryRun) { - if (dynamicPartitioningUsed) { - // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself - // instead of moving each file under the directory. See HCATALOG-538 - - final Path parentDir = finalOutputPath.getParent(); - // Create the directory - Path placeholder = new Path(parentDir, "_placeholder"); - if (fs.mkdirs(parentDir)) { - // It is weired but we need a placeholder, - // otherwise rename cannot move file to the right place - fs.create(placeholder).close(); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Moving directory: " + file + " to " + parentDir); - } - if (!fs.rename(file, parentDir)) { - final String msg = "Failed to move file: " + file + " to " + parentDir; - LOG.error(msg); - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); - } - fs.delete(placeholder, false); - } else { - // In case of no partition we have to move each file - for (FileStatus child : children) { - moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); - } - } - } else { - if(fs.exists(finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath - + ", duplicate publish not possible."); - } - } + if (!fs.rename(file, parentDir)) { + final String msg = "Failed to move file: " + file + " to " + parentDir; + LOG.error(msg); + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } + fs.delete(placeholder, false); + } else { + // In case of no partition we have to move each file + for (FileStatus child : children) { + moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); + } + } } else { - // Should never happen - final String msg = "Unknown file type being asked to be moved, erroring out"; - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); + if(fs.exists(finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + + ", duplicate publish not possible."); + } } + } + } else { + // Should never happen + final String msg = "Unknown file type being asked to be moved, erroring out"; + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } - - /** - * Find the final name of a given output file, given the output directory - * and the work directory. - * @param file the file to move - * @param src the source directory - * @param dest the target directory - * @return the final path for the specific output file - * @throws java.io.IOException - */ - private Path getFinalPath(Path file, Path src, - Path dest) throws IOException { - URI taskOutputUri = file.toUri(); - URI relativePath = src.toUri().relativize(taskOutputUri); - if (taskOutputUri == relativePath) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + - src + " child = " + file); + } + + /** + * Find the final name of a given output file, given the output directory + * and the work directory. + * @param file the file to move + * @param src the source directory + * @param dest the target directory + * @return the final path for the specific output file + * @throws java.io.IOException + */ + private Path getFinalPath(Path file, Path src, + Path dest) throws IOException { + URI taskOutputUri = file.toUri(); + URI relativePath = src.toUri().relativize(taskOutputUri); + if (taskOutputUri == relativePath) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + + src + " child = " + file); + } + if (relativePath.getPath().length() > 0) { + return new Path(dest, relativePath.getPath()); + } else { + return dest; + } + } + + /** + * Run to discover dynamic partitions available + */ + private void discoverPartitions(JobContext context) throws IOException { + if (!partitionsDiscovered) { + // LOG.info("discover ptns called"); + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + + harProcessor.setEnabled(jobInfo.getHarRequested()); + + List dynamicPartCols = jobInfo.getPosOfDynPartCols(); + int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); + + Path loadPath = new Path(jobInfo.getLocation()); + FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); + + // construct a path pattern (e.g., /*/*) to find all dynamically generated paths + String dynPathSpec = loadPath.toUri().getPath(); + dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); + + // LOG.info("Searching for "+dynPathSpec); + Path pathPattern = new Path(dynPathSpec); + FileStatus[] status = fs.globStatus(pathPattern); + + partitionsDiscoveredByPath = new LinkedHashMap>(); + contextDiscoveredByPath = new LinkedHashMap(); + + + if (status.length == 0) { + // LOG.warn("No partition found genereated by dynamic partitioning in [" + // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() + // +"], dynSpec["+dynPathSpec+"]"); + } else { + if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { + this.partitionsDiscovered = true; + throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, + "Number of dynamic partitions being created " + + "exceeds configured max allowable partitions[" + + maxDynamicPartitions + + "], increase parameter [" + + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + + "] if needed."); } - if (relativePath.getPath().length() > 0) { - return new Path(dest, relativePath.getPath()); - } else { - return dest; + + for (FileStatus st : status) { + LinkedHashMap fullPartSpec = new LinkedHashMap(); + Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); + partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); + JobConf jobConf = (JobConf)context.getConfiguration(); + JobContext currContext = HCatMapRedUtil.createJobContext( + jobConf, + context.getJobID(), + InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, + ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))); + HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); + contextDiscoveredByPath.put(st.getPath().toString(), currContext); } - } + } - /** - * Run to discover dynamic partitions available - */ - private void discoverPartitions(JobContext context) throws IOException { - if (!partitionsDiscovered) { - // LOG.info("discover ptns called"); - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - - harProcessor.setEnabled(jobInfo.getHarRequested()); - - List dynamicPartCols = jobInfo.getPosOfDynPartCols(); - int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - - Path loadPath = new Path(jobInfo.getLocation()); - FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); - - // construct a path pattern (e.g., /*/*) to find all dynamically generated paths - String dynPathSpec = loadPath.toUri().getPath(); - dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); - - // LOG.info("Searching for "+dynPathSpec); - Path pathPattern = new Path(dynPathSpec); - FileStatus[] status = fs.globStatus(pathPattern); - - partitionsDiscoveredByPath = new LinkedHashMap>(); - contextDiscoveredByPath = new LinkedHashMap(); - - - if (status.length == 0) { - // LOG.warn("No partition found genereated by dynamic partitioning in [" - // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() - // +"], dynSpec["+dynPathSpec+"]"); - } else { - if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { - this.partitionsDiscovered = true; - throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, - "Number of dynamic partitions being created " - + "exceeds configured max allowable partitions[" - + maxDynamicPartitions - + "], increase parameter [" - + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname - + "] if needed."); - } - - for (FileStatus st : status) { - LinkedHashMap fullPartSpec = new LinkedHashMap(); - Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); - partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); - JobConf jobConf = (JobConf)context.getConfiguration(); - JobContext currContext = HCatMapRedUtil.createJobContext( - jobConf, - context.getJobID(), - InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, - ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))); - HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); - contextDiscoveredByPath.put(st.getPath().toString(), currContext); - } - } + // for (Entry> spec : partitionsDiscoveredByPath.entrySet()){ + // LOG.info("Partition "+ spec.getKey()); + // for (Entry e : spec.getValue().entrySet()){ + // LOG.info(e.getKey() + "=>" +e.getValue()); + // } + // } - // for (Entry> spec : partitionsDiscoveredByPath.entrySet()){ - // LOG.info("Partition "+ spec.getKey()); - // for (Entry e : spec.getValue().entrySet()){ - // LOG.info(e.getKey() + "=>" +e.getValue()); - // } - // } + this.partitionsDiscovered = true; + } + } - this.partitionsDiscovered = true; - } + private void registerPartitions(JobContext context) throws IOException{ + if (dynamicPartitioningUsed){ + discoverPartitions(context); + } + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + Configuration conf = context.getConfiguration(); + Table table = new Table(jobInfo.getTableInfo().getTable()); + Path tblPath = new Path(table.getTTable().getSd().getLocation()); + FileSystem fs = tblPath.getFileSystem(conf); + + if( table.getPartitionKeys().size() == 0 ) { + //Move data from temp directory the actual table directory + //No metastore operation required. + Path src = new Path(jobInfo.getLocation()); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); + return; } - private void registerPartitions(JobContext context) throws IOException{ - if (dynamicPartitioningUsed){ - discoverPartitions(context); + HiveMetaStoreClient client = null; + HCatTableInfo tableInfo = jobInfo.getTableInfo(); + List partitionsAdded = new ArrayList(); + try { + HiveConf hiveConf = HCatUtil.getHiveConf(conf); + client = HCatUtil.getHiveClient(hiveConf); + StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters()); + + FileStatus tblStat = fs.getFileStatus(tblPath); + String grpName = tblStat.getGroup(); + FsPermission perms = tblStat.getPermission(); + + List partitionsToAdd = new ArrayList(); + if (!dynamicPartitioningUsed){ + partitionsToAdd.add( + constructPartition( + context,jobInfo, + tblPath.toString(), jobInfo.getPartitionValues() + ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) + ,table, fs + ,grpName,perms)); + }else{ + for (Entry> entry : partitionsDiscoveredByPath.entrySet()){ + partitionsToAdd.add( + constructPartition( + context,jobInfo, + getPartitionRootLocation(entry.getKey(),entry.getValue().size()), entry.getValue() + ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) + ,table, fs + ,grpName,perms)); } - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - Configuration conf = context.getConfiguration(); - Table table = new Table(jobInfo.getTableInfo().getTable()); - Path tblPath = new Path(table.getTTable().getSd().getLocation()); - FileSystem fs = tblPath.getFileSystem(conf); - - if( table.getPartitionKeys().size() == 0 ) { - //Move data from temp directory the actual table directory - //No metastore operation required. - Path src = new Path(jobInfo.getLocation()); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - return; - } - - HiveMetaStoreClient client = null; - HCatTableInfo tableInfo = jobInfo.getTableInfo(); - List partitionsAdded = new ArrayList(); + } + + ArrayList> ptnInfos = new ArrayList>(); + for(Partition ptn : partitionsToAdd){ + ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn)); + } + + //Publish the new partition(s) + if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())){ + + Path src = new Path(ptnRootLocation); + // check here for each dir we're copying out, to see if it + // already exists, error out if so + moveTaskOutputs(fs, src, src, tblPath, true); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); try { - HiveConf hiveConf = HCatUtil.getHiveConf(conf); - client = HCatUtil.getHiveClient(hiveConf); - StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters()); - - FileStatus tblStat = fs.getFileStatus(tblPath); - String grpName = tblStat.getGroup(); - FsPermission perms = tblStat.getPermission(); - - List partitionsToAdd = new ArrayList(); - if (!dynamicPartitioningUsed){ - partitionsToAdd.add( - constructPartition( - context,jobInfo, - tblPath.toString(), jobInfo.getPartitionValues() - ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) - ,table, fs - ,grpName,perms)); - }else{ - for (Entry> entry : partitionsDiscoveredByPath.entrySet()){ - partitionsToAdd.add( - constructPartition( - context,jobInfo, - getPartitionRootLocation(entry.getKey(),entry.getValue().size()), entry.getValue() - ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) - ,table, fs - ,grpName,perms)); - } + updateTableSchema(client, table, jobInfo.getOutputSchema()); + LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); + client.add_partitions(partitionsToAdd); + partitionsAdded = partitionsToAdd; + } catch (Exception e){ + // There was an error adding partitions : rollback fs copy and rethrow + for (Partition p : partitionsToAdd){ + Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation()))); + if (fs.exists(ptnPath)){ + fs.delete(ptnPath,true); } - - ArrayList> ptnInfos = new ArrayList>(); - for(Partition ptn : partitionsToAdd){ - ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn)); - } - - //Publish the new partition(s) - if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())){ - - Path src = new Path(ptnRootLocation); - // check here for each dir we're copying out, to see if it - // already exists, error out if so - moveTaskOutputs(fs, src, src, tblPath, true); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - try { - updateTableSchema(client, table, jobInfo.getOutputSchema()); - LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); - client.add_partitions(partitionsToAdd); - partitionsAdded = partitionsToAdd; - } catch (Exception e){ - // There was an error adding partitions : rollback fs copy and rethrow - for (Partition p : partitionsToAdd){ - Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation()))); - if (fs.exists(ptnPath)){ - fs.delete(ptnPath,true); - } - } - throw e; - } - - }else{ - // no harProcessor, regular operation - updateTableSchema(client, table, jobInfo.getOutputSchema()); - LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); - if (dynamicPartitioningUsed && (partitionsToAdd.size()>0)){ - Path src = new Path(ptnRootLocation); - moveTaskOutputs(fs, src, src, tblPath, true); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - } - client.add_partitions(partitionsToAdd); - partitionsAdded = partitionsToAdd; - } - } catch (Exception e) { - if (partitionsAdded.size() > 0) { - try { - // baseCommitter.cleanupJob failed, try to clean up the - // metastore - for (Partition p : partitionsAdded) { - client.dropPartition(tableInfo.getDatabaseName(), - tableInfo.getTableName(), p.getValues()); - } - } catch (Exception te) { - // Keep cause as the original exception - throw new HCatException( - ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } finally { - HCatUtil.closeHiveClientQuietly(client); + } + throw e; } - } - private void cancelDelegationTokens(JobContext context) throws IOException{ - LOG.info("Cancelling deletgation token for the job."); - HiveMetaStoreClient client = null; + }else{ + // no harProcessor, regular operation + updateTableSchema(client, table, jobInfo.getOutputSchema()); + LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); + if (dynamicPartitioningUsed && (partitionsToAdd.size()>0)){ + Path src = new Path(ptnRootLocation); + moveTaskOutputs(fs, src, src, tblPath, true); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); + } + client.add_partitions(partitionsToAdd); + partitionsAdded = partitionsToAdd; + } + } catch (Exception e) { + if (partitionsAdded.size() > 0) { try { - HiveConf hiveConf = HCatUtil - .getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - // cancel the deleg. tokens that were acquired for this job now that - // we are done - we should cancel if the tokens were acquired by - // HCatOutputFormat and not if they were supplied by Oozie. - // In the latter case the HCAT_KEY_TOKEN_SIGNATURE property in - // the conf will not be set - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null - && context.getConfiguration().get( - HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - client.cancelDelegationToken(tokenStrForm); - } - } catch (MetaException e) { - LOG.warn("MetaException while cancelling delegation token.", e); - } catch (TException e) { - LOG.warn("TException while cancelling delegation token.", e); - } finally { - HCatUtil.closeHiveClientQuietly(client); + // baseCommitter.cleanupJob failed, try to clean up the + // metastore + for (Partition p : partitionsAdded) { + client.dropPartition(tableInfo.getDatabaseName(), + tableInfo.getTableName(), p.getValues()); + } + } catch (Exception te) { + // Keep cause as the original exception + throw new HCatException( + ErrorType.ERROR_PUBLISHING_PARTITION, e); } + } + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); + } + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + } + + private void cancelDelegationTokens(JobContext context) throws IOException{ + LOG.info("Cancelling deletgation token for the job."); + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil + .getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + // cancel the deleg. tokens that were acquired for this job now that + // we are done - we should cancel if the tokens were acquired by + // HCatOutputFormat and not if they were supplied by Oozie. + // In the latter case the HCAT_KEY_TOKEN_SIGNATURE property in + // the conf will not be set + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null + && context.getConfiguration().get( + HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + client.cancelDelegationToken(tokenStrForm); + } + } catch (MetaException e) { + LOG.warn("MetaException while cancelling delegation token.", e); + } catch (TException e) { + LOG.warn("TException while cancelling delegation token.", e); + } finally { + HCatUtil.closeHiveClientQuietly(client); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java index bd1d98d..9821100 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java @@ -60,194 +60,194 @@ */ class FileOutputFormatContainer extends OutputFormatContainer { - private static final PathFilter hiddenFileFilter = new PathFilter() { - public boolean accept(Path p) { - String name = p.getName(); - return !name.startsWith("_") && !name.startsWith("."); - } - }; - - /** - * @param of base OutputFormat to contain - */ - public FileOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { - super(of); + private static final PathFilter hiddenFileFilter = new PathFilter() { + public boolean accept(Path p) { + String name = p.getName(); + return !name.startsWith("_") && !name.startsWith("."); } - - @Override - public RecordWriter, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - //this needs to be manually set, under normal circumstances MR Task does this - setWorkOutputPath(context); - - //Configure the output key and value classes. - // This is required for writing null as key for file based tables. - context.getConfiguration().set("mapred.output.key.class", - NullWritable.class.getName()); - String jobInfoString = context.getConfiguration().get( - HCatConstants.HCAT_KEY_OUTPUT_INFO); - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil - .deserialize(jobInfoString); - StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( - context.getConfiguration(), storeInfo); - Class serde = storageHandler.getSerDeClass(); - SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, - context.getConfiguration()); - context.getConfiguration().set("mapred.output.value.class", - sd.getSerializedClass().getName()); - - RecordWriter, HCatRecord> rw; - if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){ - // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. - // (That's because records can't be written until the values of the dynamic partitions are deduced. - // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) - rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context); - } else { - Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); - Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part")); - - rw = new FileRecordWriterContainer( - getBaseOutputFormat().getRecordWriter( - parentDir.getFileSystem(context.getConfiguration()), - new JobConf(context.getConfiguration()), - childPath.toString(), - InternalUtil.createReporter(context)), - context); - } - return rw; + }; + + /** + * @param of base OutputFormat to contain + */ + public FileOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { + super(of); + } + + @Override + public RecordWriter, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + //this needs to be manually set, under normal circumstances MR Task does this + setWorkOutputPath(context); + + //Configure the output key and value classes. + // This is required for writing null as key for file based tables. + context.getConfiguration().set("mapred.output.key.class", + NullWritable.class.getName()); + String jobInfoString = context.getConfiguration().get( + HCatConstants.HCAT_KEY_OUTPUT_INFO); + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil + .deserialize(jobInfoString); + StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( + context.getConfiguration(), storeInfo); + Class serde = storageHandler.getSerDeClass(); + SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, + context.getConfiguration()); + context.getConfiguration().set("mapred.output.value.class", + sd.getSerializedClass().getName()); + + RecordWriter, HCatRecord> rw; + if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){ + // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. + // (That's because records can't be written until the values of the dynamic partitions are deduced. + // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) + rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context); + } else { + Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); + Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part")); + + rw = new FileRecordWriterContainer( + getBaseOutputFormat().getRecordWriter( + parentDir.getFileSystem(context.getConfiguration()), + new JobConf(context.getConfiguration()), + childPath.toString(), + InternalUtil.createReporter(context)), + context); + } + return rw; + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + handleDuplicatePublish(context, + jobInfo, + client, + new Table(jobInfo.getTableInfo().getTable())); + } catch (MetaException e) { + throw new IOException(e); + } catch (TException e) { + throw new IOException(e); + } finally { + HCatUtil.closeHiveClientQuietly(client); } - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - HiveMetaStoreClient client = null; - try { - HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - handleDuplicatePublish(context, - jobInfo, - client, - new Table(jobInfo.getTableInfo().getTable())); - } catch (MetaException e) { - throw new IOException(e); - } catch (TException e) { - throw new IOException(e); - } finally { - HCatUtil.closeHiveClientQuietly(client); + if (!jobInfo.isDynamicPartitioningUsed()) { + JobConf jobConf = new JobConf(context.getConfiguration()); + getBaseOutputFormat().checkOutputSpecs(null, jobConf); + //checkoutputspecs might've set some properties we need to have context reflect that + HCatUtil.copyConf(jobConf, context.getConfiguration()); + } + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { + //this needs to be manually set, under normal circumstances MR Task does this + setWorkOutputPath(context); + return new FileOutputCommitterContainer(context, + HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ? + null : + new JobConf(context.getConfiguration()).getOutputCommitter()); + } + + /** + * Handles duplicate publish of partition. Fails if partition already exists. + * For non partitioned tables, fails if files are present in table directory. + * For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time + * @param context the job + * @param outputInfo the output info + * @param client the metastore client + * @param table the table being written to + * @throws IOException + * @throws org.apache.hadoop.hive.metastore.api.MetaException + * @throws org.apache.thrift.TException + */ + private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, + HiveMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException { + + /* + * For fully specified ptn, follow strict checks for existence of partitions in metadata + * For unpartitioned tables, follow filechecks + * For partially specified tables: + * This would then need filechecks at the start of a ptn write, + * Doing metadata checks can get potentially very expensive (fat conf) if + * there are a large number of partitions that match the partial specifications + */ + + if (table.getPartitionKeys().size() > 0) { + if (!outputInfo.isDynamicPartitioningUsed()) { + List partitionValues = getPartitionValueList( + table, outputInfo.getPartitionValues()); + // fully-specified partition + List currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), + outputInfo.getTableName(), partitionValues, (short) 1); + + if (currentParts.size() > 0) { + throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION); } + } + } else { + List partitionValues = getPartitionValueList( + table, outputInfo.getPartitionValues()); + // non-partitioned table - if (!jobInfo.isDynamicPartitioningUsed()) { - JobConf jobConf = new JobConf(context.getConfiguration()); - getBaseOutputFormat().checkOutputSpecs(null, jobConf); - //checkoutputspecs might've set some properties we need to have context reflect that - HCatUtil.copyConf(jobConf, context.getConfiguration()); - } - } + Path tablePath = new Path(table.getTTable().getSd().getLocation()); + FileSystem fs = tablePath.getFileSystem(context.getConfiguration()); - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - //this needs to be manually set, under normal circumstances MR Task does this - setWorkOutputPath(context); - return new FileOutputCommitterContainer(context, - HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ? - null : - new JobConf(context.getConfiguration()).getOutputCommitter()); - } + if (fs.exists(tablePath)) { + FileStatus[] status = fs.globStatus(new Path(tablePath, "*"), hiddenFileFilter); - /** - * Handles duplicate publish of partition. Fails if partition already exists. - * For non partitioned tables, fails if files are present in table directory. - * For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time - * @param context the job - * @param outputInfo the output info - * @param client the metastore client - * @param table the table being written to - * @throws IOException - * @throws org.apache.hadoop.hive.metastore.api.MetaException - * @throws org.apache.thrift.TException - */ - private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, - HiveMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException { - - /* - * For fully specified ptn, follow strict checks for existence of partitions in metadata - * For unpartitioned tables, follow filechecks - * For partially specified tables: - * This would then need filechecks at the start of a ptn write, - * Doing metadata checks can get potentially very expensive (fat conf) if - * there are a large number of partitions that match the partial specifications - */ - - if (table.getPartitionKeys().size() > 0) { - if (!outputInfo.isDynamicPartitioningUsed()) { - List partitionValues = getPartitionValueList( - table, outputInfo.getPartitionValues()); - // fully-specified partition - List currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), - outputInfo.getTableName(), partitionValues, (short) 1); - - if (currentParts.size() > 0) { - throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION); - } - } - } else { - List partitionValues = getPartitionValueList( - table, outputInfo.getPartitionValues()); - // non-partitioned table - - Path tablePath = new Path(table.getTTable().getSd().getLocation()); - FileSystem fs = tablePath.getFileSystem(context.getConfiguration()); - - if (fs.exists(tablePath)) { - FileStatus[] status = fs.globStatus(new Path(tablePath, "*"), hiddenFileFilter); - - if (status.length > 0) { - throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, - table.getDbName() + "." + table.getTableName()); - } - } + if (status.length > 0) { + throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, + table.getDbName() + "." + table.getTableName()); } + } + } + } + + /** + * Convert the partition value map to a value list in the partition key order. + * @param table the table being written to + * @param valueMap the partition value map + * @return the partition value list + * @throws java.io.IOException + */ + static List getPartitionValueList(Table table, Map valueMap) throws IOException { + + if (valueMap.size() != table.getPartitionKeys().size()) { + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Table " + + table.getTableName() + " has " + + table.getPartitionKeys().size() + " partition keys, got " + + valueMap.size()); } - /** - * Convert the partition value map to a value list in the partition key order. - * @param table the table being written to - * @param valueMap the partition value map - * @return the partition value list - * @throws java.io.IOException - */ - static List getPartitionValueList(Table table, Map valueMap) throws IOException { - - if (valueMap.size() != table.getPartitionKeys().size()) { - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Table " - + table.getTableName() + " has " + - table.getPartitionKeys().size() + " partition keys, got " + - valueMap.size()); - } - - List values = new ArrayList(); - - for (FieldSchema schema : table.getPartitionKeys()) { - String value = valueMap.get(schema.getName().toLowerCase()); + List values = new ArrayList(); - if (value == null) { - throw new HCatException(ErrorType.ERROR_MISSING_PARTITION_KEY, - "Key " + schema.getName() + " of table " + table.getTableName()); - } + for (FieldSchema schema : table.getPartitionKeys()) { + String value = valueMap.get(schema.getName().toLowerCase()); - values.add(value); - } + if (value == null) { + throw new HCatException(ErrorType.ERROR_MISSING_PARTITION_KEY, + "Key " + schema.getName() + " of table " + table.getTableName()); + } - return values; + values.add(value); } - static void setWorkOutputPath(TaskAttemptContext context) throws IOException { - String outputPath = context.getConfiguration().get("mapred.output.dir"); - //we need to do this to get the task path and set it for mapred implementation - //since it can't be done automatically because of mapreduce->mapred abstraction - if (outputPath != null) - context.getConfiguration().set("mapred.work.output.dir", - new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); - } + return values; + } + + static void setWorkOutputPath(TaskAttemptContext context) throws IOException { + String outputPath = context.getConfiguration().get("mapred.output.dir"); + //we need to do this to get the task path and set it for mapred implementation + //since it can't be done automatically because of mapreduce->mapred abstraction + if (outputPath != null) + context.getConfiguration().set("mapred.work.output.dir", + new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java index 0333230..efdb595 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java @@ -53,216 +53,216 @@ */ class FileRecordWriterContainer extends RecordWriterContainer { - private final HCatStorageHandler storageHandler; - private final SerDe serDe; - private final ObjectInspector objectInspector; - - private boolean dynamicPartitioningUsed = false; - - private final Map, ? super Writable>> baseDynamicWriters; - private final Map baseDynamicSerDe; - private final Map baseDynamicCommitters; - private final Map dynamicContexts; - private final Map dynamicObjectInspectors; - private Map dynamicOutputJobInfo; - - - private final List partColsToDel; - private final List dynamicPartCols; - private int maxDynamicPartitions; - - private OutputJobInfo jobInfo; - private TaskAttemptContext context; - - /** - * @param baseWriter RecordWriter to contain - * @param context current TaskAttemptContext - * @throws IOException - * @throws InterruptedException - */ - public FileRecordWriterContainer(org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseWriter, - TaskAttemptContext context) throws IOException, InterruptedException { - super(context, baseWriter); - this.context = context; - jobInfo = HCatOutputFormat.getJobInfo(context); - - storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); - objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); - try { - InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to inialize SerDe", e); - } - - // If partition columns occur in data, we want to remove them. - partColsToDel = jobInfo.getPosOfPartCols(); - dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); - dynamicPartCols = jobInfo.getPosOfDynPartCols(); - maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - - if ((partColsToDel == null) || (dynamicPartitioningUsed && (dynamicPartCols == null))) { - throw new HCatException("It seems that setSchema() is not called on " + - "HCatOutputFormat. Please make sure that method is called."); - } + private final HCatStorageHandler storageHandler; + private final SerDe serDe; + private final ObjectInspector objectInspector; + + private boolean dynamicPartitioningUsed = false; + + private final Map, ? super Writable>> baseDynamicWriters; + private final Map baseDynamicSerDe; + private final Map baseDynamicCommitters; + private final Map dynamicContexts; + private final Map dynamicObjectInspectors; + private Map dynamicOutputJobInfo; + + + private final List partColsToDel; + private final List dynamicPartCols; + private int maxDynamicPartitions; + + private OutputJobInfo jobInfo; + private TaskAttemptContext context; + + /** + * @param baseWriter RecordWriter to contain + * @param context current TaskAttemptContext + * @throws IOException + * @throws InterruptedException + */ + public FileRecordWriterContainer(org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseWriter, + TaskAttemptContext context) throws IOException, InterruptedException { + super(context, baseWriter); + this.context = context; + jobInfo = HCatOutputFormat.getJobInfo(context); + + storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); + objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); + try { + InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); + } catch (SerDeException e) { + throw new IOException("Failed to inialize SerDe", e); + } + // If partition columns occur in data, we want to remove them. + partColsToDel = jobInfo.getPosOfPartCols(); + dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); + dynamicPartCols = jobInfo.getPosOfDynPartCols(); + maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - if (!dynamicPartitioningUsed) { - this.baseDynamicSerDe = null; - this.baseDynamicWriters = null; - this.baseDynamicCommitters = null; - this.dynamicContexts = null; - this.dynamicObjectInspectors = null; - this.dynamicOutputJobInfo = null; - } else { - this.baseDynamicSerDe = new HashMap(); - this.baseDynamicWriters = new HashMap, ? super Writable>>(); - this.baseDynamicCommitters = new HashMap(); - this.dynamicContexts = new HashMap(); - this.dynamicObjectInspectors = new HashMap(); - this.dynamicOutputJobInfo = new HashMap(); - } + if ((partColsToDel == null) || (dynamicPartitioningUsed && (dynamicPartCols == null))) { + throw new HCatException("It seems that setSchema() is not called on " + + "HCatOutputFormat. Please make sure that method is called."); } - /** - * @return the storagehandler - */ - public HCatStorageHandler getStorageHandler() { - return storageHandler; - } - @Override - public void close(TaskAttemptContext context) throws IOException, - InterruptedException { - Reporter reporter = InternalUtil.createReporter(context); - if (dynamicPartitioningUsed) { - for (org.apache.hadoop.mapred.RecordWriter, ? super Writable> bwriter : baseDynamicWriters.values()) { - //We are in RecordWriter.close() make sense that the context would be TaskInputOutput - bwriter.close(reporter); - } - for (Map.Entry entry : baseDynamicCommitters.entrySet()) { - org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); - OutputCommitter baseOutputCommitter = entry.getValue(); - if (baseOutputCommitter.needsTaskCommit(currContext)) { - baseOutputCommitter.commitTask(currContext); - } - } - } else { - getBaseRecordWriter().close(reporter); - } + if (!dynamicPartitioningUsed) { + this.baseDynamicSerDe = null; + this.baseDynamicWriters = null; + this.baseDynamicCommitters = null; + this.dynamicContexts = null; + this.dynamicObjectInspectors = null; + this.dynamicOutputJobInfo = null; + } else { + this.baseDynamicSerDe = new HashMap(); + this.baseDynamicWriters = new HashMap, ? super Writable>>(); + this.baseDynamicCommitters = new HashMap(); + this.dynamicContexts = new HashMap(); + this.dynamicObjectInspectors = new HashMap(); + this.dynamicOutputJobInfo = new HashMap(); } - - @Override - public void write(WritableComparable key, HCatRecord value) throws IOException, - InterruptedException { - - org.apache.hadoop.mapred.RecordWriter localWriter; - ObjectInspector localObjectInspector; - SerDe localSerDe; - OutputJobInfo localJobInfo = null; - - if (dynamicPartitioningUsed) { - // calculate which writer to use from the remaining values - this needs to be done before we delete cols - List dynamicPartValues = new ArrayList(); - for (Integer colToAppend : dynamicPartCols) { - dynamicPartValues.add(value.get(colToAppend).toString()); - } - - String dynKey = dynamicPartValues.toString(); - if (!baseDynamicWriters.containsKey(dynKey)) { - if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { - throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, - "Number of dynamic partitions being created " - + "exceeds configured max allowable partitions[" - + maxDynamicPartitions - + "], increase parameter [" - + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname - + "] if needed."); - } - - org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context); - configureDynamicStorageHandler(currTaskContext, dynamicPartValues); - localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext); - - //setup serDe - SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); - try { - InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to initialize SerDe", e); - } - - //create base OutputFormat - org.apache.hadoop.mapred.OutputFormat baseOF = - ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); - - //We are skipping calling checkOutputSpecs() for each partition - //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition - //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance - //In general this should be ok for most FileOutputFormat implementations - //but may become an issue for cases when the method is used to perform other setup tasks - - //get Output Committer - org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter(); - //create currJobContext the latest so it gets all the config changes - org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext); - //setupJob() - baseOutputCommitter.setupJob(currJobContext); - //recreate to refresh jobConf of currTask context - currTaskContext = - HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), - currTaskContext.getTaskAttemptID(), - currTaskContext.getProgressible()); - //set temp location - currTaskContext.getConfiguration().set("mapred.work.output.dir", - new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString()); - //setupTask() - baseOutputCommitter.setupTask(currTaskContext); - - Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); - Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", "")); - - org.apache.hadoop.mapred.RecordWriter baseRecordWriter = - baseOF.getRecordWriter( - parentDir.getFileSystem(currTaskContext.getConfiguration()), - currTaskContext.getJobConf(), - childPath.toString(), - InternalUtil.createReporter(currTaskContext)); - - baseDynamicWriters.put(dynKey, baseRecordWriter); - baseDynamicSerDe.put(dynKey, currSerDe); - baseDynamicCommitters.put(dynKey, baseOutputCommitter); - dynamicContexts.put(dynKey, currTaskContext); - dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); - dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey))); - } - - localJobInfo = dynamicOutputJobInfo.get(dynKey); - localWriter = baseDynamicWriters.get(dynKey); - localSerDe = baseDynamicSerDe.get(dynKey); - localObjectInspector = dynamicObjectInspectors.get(dynKey); - } else { - localJobInfo = jobInfo; - localWriter = getBaseRecordWriter(); - localSerDe = serDe; - localObjectInspector = objectInspector; + } + + /** + * @return the storagehandler + */ + public HCatStorageHandler getStorageHandler() { + return storageHandler; + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + Reporter reporter = InternalUtil.createReporter(context); + if (dynamicPartitioningUsed) { + for (org.apache.hadoop.mapred.RecordWriter, ? super Writable> bwriter : baseDynamicWriters.values()) { + //We are in RecordWriter.close() make sense that the context would be TaskInputOutput + bwriter.close(reporter); + } + for (Map.Entry entry : baseDynamicCommitters.entrySet()) { + org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); + OutputCommitter baseOutputCommitter = entry.getValue(); + if (baseOutputCommitter.needsTaskCommit(currContext)) { + baseOutputCommitter.commitTask(currContext); } - - for (Integer colToDel : partColsToDel) { - value.remove(colToDel); + } + } else { + getBaseRecordWriter().close(reporter); + } + } + + @Override + public void write(WritableComparable key, HCatRecord value) throws IOException, + InterruptedException { + + org.apache.hadoop.mapred.RecordWriter localWriter; + ObjectInspector localObjectInspector; + SerDe localSerDe; + OutputJobInfo localJobInfo = null; + + if (dynamicPartitioningUsed) { + // calculate which writer to use from the remaining values - this needs to be done before we delete cols + List dynamicPartValues = new ArrayList(); + for (Integer colToAppend : dynamicPartCols) { + dynamicPartValues.add(value.get(colToAppend).toString()); + } + + String dynKey = dynamicPartValues.toString(); + if (!baseDynamicWriters.containsKey(dynKey)) { + if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { + throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, + "Number of dynamic partitions being created " + + "exceeds configured max allowable partitions[" + + maxDynamicPartitions + + "], increase parameter [" + + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + + "] if needed."); } + org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context); + configureDynamicStorageHandler(currTaskContext, dynamicPartValues); + localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext); - //The key given by user is ignored + //setup serDe + SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { - localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector)); + InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); } catch (SerDeException e) { - throw new IOException("Failed to serialize object", e); + throw new IOException("Failed to initialize SerDe", e); } + + //create base OutputFormat + org.apache.hadoop.mapred.OutputFormat baseOF = + ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); + + //We are skipping calling checkOutputSpecs() for each partition + //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition + //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance + //In general this should be ok for most FileOutputFormat implementations + //but may become an issue for cases when the method is used to perform other setup tasks + + //get Output Committer + org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter(); + //create currJobContext the latest so it gets all the config changes + org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext); + //setupJob() + baseOutputCommitter.setupJob(currJobContext); + //recreate to refresh jobConf of currTask context + currTaskContext = + HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), + currTaskContext.getTaskAttemptID(), + currTaskContext.getProgressible()); + //set temp location + currTaskContext.getConfiguration().set("mapred.work.output.dir", + new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString()); + //setupTask() + baseOutputCommitter.setupTask(currTaskContext); + + Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); + Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", "")); + + org.apache.hadoop.mapred.RecordWriter baseRecordWriter = + baseOF.getRecordWriter( + parentDir.getFileSystem(currTaskContext.getConfiguration()), + currTaskContext.getJobConf(), + childPath.toString(), + InternalUtil.createReporter(currTaskContext)); + + baseDynamicWriters.put(dynKey, baseRecordWriter); + baseDynamicSerDe.put(dynKey, currSerDe); + baseDynamicCommitters.put(dynKey, baseOutputCommitter); + dynamicContexts.put(dynKey, currTaskContext); + dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); + dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey))); + } + + localJobInfo = dynamicOutputJobInfo.get(dynKey); + localWriter = baseDynamicWriters.get(dynKey); + localSerDe = baseDynamicSerDe.get(dynKey); + localObjectInspector = dynamicObjectInspectors.get(dynKey); + } else { + localJobInfo = jobInfo; + localWriter = getBaseRecordWriter(); + localSerDe = serDe; + localObjectInspector = objectInspector; } - protected void configureDynamicStorageHandler(JobContext context, List dynamicPartVals) throws IOException { - HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); + for (Integer colToDel : partColsToDel) { + value.remove(colToDel); } + + //The key given by user is ignored + try { + localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector)); + } catch (SerDeException e) { + throw new IOException("Failed to serialize object", e); + } + } + + protected void configureDynamicStorageHandler(JobContext context, List dynamicPartVals) throws IOException { + HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); + } + } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FosterStorageHandler.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FosterStorageHandler.java index 814e4f4..5f9fc91 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FosterStorageHandler.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FosterStorageHandler.java @@ -48,147 +48,147 @@ */ public class FosterStorageHandler extends HCatStorageHandler { - public Configuration conf; - /** The directory under which data is initially written for a partitioned table */ - protected static final String DYNTEMP_DIR_NAME = "_DYN"; - - /** The directory under which data is initially written for a non partitioned table */ - protected static final String TEMP_DIR_NAME = "_TEMP"; - - private Class ifClass; - private Class ofClass; - private Class serDeClass; - - public FosterStorageHandler(String ifName, String ofName, String serdeName) throws ClassNotFoundException { - this((Class) Class.forName(ifName), - (Class) Class.forName(ofName), - (Class) Class.forName(serdeName)); - } - - public FosterStorageHandler(Class ifClass, - Class ofClass, - Class serDeClass) { - this.ifClass = ifClass; - this.ofClass = ofClass; - this.serDeClass = serDeClass; - } - - @Override - public Class getInputFormatClass() { - return ifClass; //To change body of overridden methods use File | Settings | File Templates. - } - - @Override - public Class getOutputFormatClass() { - return ofClass; //To change body of overridden methods use File | Settings | File Templates. - } - - @Override - public Class getSerDeClass() { - return serDeClass; //To change body of implemented methods use File | Settings | File Templates. - } - - @Override - public HiveMetaHook getMetaHook() { - return null; - } - - @Override - public void configureInputJobProperties(TableDesc tableDesc, - Map jobProperties) { - - } - - @Override - public void configureOutputJobProperties(TableDesc tableDesc, - Map jobProperties) { - try { - OutputJobInfo jobInfo = (OutputJobInfo) - HCatUtil.deserialize(tableDesc.getJobProperties().get( - HCatConstants.HCAT_KEY_OUTPUT_INFO)); - String parentPath = jobInfo.getTableInfo().getTableLocation(); - String dynHash = tableDesc.getJobProperties().get( - HCatConstants.HCAT_DYNAMIC_PTN_JOBID); - - // For dynamic partitioned writes without all keyvalues specified, - // we create a temp dir for the associated write job - if (dynHash != null) { - parentPath = new Path(parentPath, - DYNTEMP_DIR_NAME + dynHash).toString(); - } - - String outputLocation; - - if (Boolean.valueOf((String)tableDesc.getProperties().get("EXTERNAL")) - && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { - // honor external table that specifies the location - outputLocation = jobInfo.getLocation(); - } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { - // For non-partitioned tables, we send them to the temp dir - outputLocation = TEMP_DIR_NAME; - } else { - List cols = new ArrayList(); - List values = new ArrayList(); - - //Get the output location in the order partition keys are defined for the table. - for (String name : - jobInfo.getTableInfo(). - getPartitionColumns().getFieldNames()) { - String value = jobInfo.getPartitionValues().get(name); - cols.add(name); - values.add(value); - } - outputLocation = FileUtils.makePartName(cols, values); - } - - jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); - - //only set output dir if partition is fully materialized - if (jobInfo.getPartitionValues().size() - == jobInfo.getTableInfo().getPartitionColumns().size()) { - jobProperties.put("mapred.output.dir", jobInfo.getLocation()); - } - - //TODO find a better home for this, RCFile specifc - jobProperties.put(RCFile.COLUMN_NUMBER_CONF_STR, - Integer.toOctalString( - jobInfo.getOutputSchema().getFields().size())); - jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(jobInfo)); - } catch (IOException e) { - throw new IllegalStateException("Failed to set output path", e); + public Configuration conf; + /** The directory under which data is initially written for a partitioned table */ + protected static final String DYNTEMP_DIR_NAME = "_DYN"; + + /** The directory under which data is initially written for a non partitioned table */ + protected static final String TEMP_DIR_NAME = "_TEMP"; + + private Class ifClass; + private Class ofClass; + private Class serDeClass; + + public FosterStorageHandler(String ifName, String ofName, String serdeName) throws ClassNotFoundException { + this((Class) Class.forName(ifName), + (Class) Class.forName(ofName), + (Class) Class.forName(serdeName)); + } + + public FosterStorageHandler(Class ifClass, + Class ofClass, + Class serDeClass) { + this.ifClass = ifClass; + this.ofClass = ofClass; + this.serDeClass = serDeClass; + } + + @Override + public Class getInputFormatClass() { + return ifClass; //To change body of overridden methods use File | Settings | File Templates. + } + + @Override + public Class getOutputFormatClass() { + return ofClass; //To change body of overridden methods use File | Settings | File Templates. + } + + @Override + public Class getSerDeClass() { + return serDeClass; //To change body of implemented methods use File | Settings | File Templates. + } + + @Override + public HiveMetaHook getMetaHook() { + return null; + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, + Map jobProperties) { + + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, + Map jobProperties) { + try { + OutputJobInfo jobInfo = (OutputJobInfo) + HCatUtil.deserialize(tableDesc.getJobProperties().get( + HCatConstants.HCAT_KEY_OUTPUT_INFO)); + String parentPath = jobInfo.getTableInfo().getTableLocation(); + String dynHash = tableDesc.getJobProperties().get( + HCatConstants.HCAT_DYNAMIC_PTN_JOBID); + + // For dynamic partitioned writes without all keyvalues specified, + // we create a temp dir for the associated write job + if (dynHash != null) { + parentPath = new Path(parentPath, + DYNTEMP_DIR_NAME + dynHash).toString(); + } + + String outputLocation; + + if (Boolean.valueOf((String)tableDesc.getProperties().get("EXTERNAL")) + && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { + // honor external table that specifies the location + outputLocation = jobInfo.getLocation(); + } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { + // For non-partitioned tables, we send them to the temp dir + outputLocation = TEMP_DIR_NAME; + } else { + List cols = new ArrayList(); + List values = new ArrayList(); + + //Get the output location in the order partition keys are defined for the table. + for (String name : + jobInfo.getTableInfo(). + getPartitionColumns().getFieldNames()) { + String value = jobInfo.getPartitionValues().get(name); + cols.add(name); + values.add(value); } - - } - - @Override - OutputFormatContainer getOutputFormatContainer( - org.apache.hadoop.mapred.OutputFormat outputFormat) { - return new FileOutputFormatContainer(outputFormat); + outputLocation = FileUtils.makePartName(cols, values); + } + + jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); + + //only set output dir if partition is fully materialized + if (jobInfo.getPartitionValues().size() + == jobInfo.getTableInfo().getPartitionColumns().size()) { + jobProperties.put("mapred.output.dir", jobInfo.getLocation()); + } + + //TODO find a better home for this, RCFile specifc + jobProperties.put(RCFile.COLUMN_NUMBER_CONF_STR, + Integer.toOctalString( + jobInfo.getOutputSchema().getFields().size())); + jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(jobInfo)); + } catch (IOException e) { + throw new IllegalStateException("Failed to set output path", e); } - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - return new DefaultHiveAuthorizationProvider(); - } - @Override - public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { - //do nothing by default - //EK: added the same (no-op) implementation as in - // org.apache.hive.hcatalog.DefaultStorageHandler (hive 0.12) - // this is needed to get 0.11 API compat layer to work - // see HIVE-4896 - } + } + + @Override + OutputFormatContainer getOutputFormatContainer( + org.apache.hadoop.mapred.OutputFormat outputFormat) { + return new FileOutputFormatContainer(outputFormat); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + return new DefaultHiveAuthorizationProvider(); + } + @Override + public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { + //do nothing by default + //EK: added the same (no-op) implementation as in + // org.apache.hive.hcatalog.DefaultStorageHandler (hive 0.12) + // this is needed to get 0.11 API compat layer to work + // see HIVE-4896 + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java index efb47b9..5555d2a 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java @@ -51,286 +51,286 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.HCatBaseInputFormat} instead */ public abstract class HCatBaseInputFormat - extends InputFormat { - - /** - * get the schema for the HCatRecord data returned by HCatInputFormat. - * - * @param context the jobContext - * @throws IllegalArgumentException - */ - private Class inputFileFormatClass; - - // TODO needs to go in InitializeInput? as part of InputJobInfo - private static HCatSchema getOutputSchema(Configuration conf) - throws IOException { - String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); - if (os == null) { - return getTableSchema(conf); - } else { - return (HCatSchema) HCatUtil.deserialize(os); - } - } - - /** - * Set the schema for the HCatRecord data returned by HCatInputFormat. - * @param job the job object - * @param hcatSchema the schema to use as the consolidated schema - */ - public static void setOutputSchema(Job job, HCatSchema hcatSchema) - throws IOException { - job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, - HCatUtil.serialize(hcatSchema)); + extends InputFormat { + + /** + * get the schema for the HCatRecord data returned by HCatInputFormat. + * + * @param context the jobContext + * @throws IllegalArgumentException + */ + private Class inputFileFormatClass; + + // TODO needs to go in InitializeInput? as part of InputJobInfo + private static HCatSchema getOutputSchema(Configuration conf) + throws IOException { + String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); + if (os == null) { + return getTableSchema(conf); + } else { + return (HCatSchema) HCatUtil.deserialize(os); } - - protected static org.apache.hadoop.mapred.InputFormat - getMapRedInputFormat(JobConf job, Class inputFormatClass) throws IOException { - return ( - org.apache.hadoop.mapred.InputFormat) - ReflectionUtils.newInstance(inputFormatClass, job); + } + + /** + * Set the schema for the HCatRecord data returned by HCatInputFormat. + * @param job the job object + * @param hcatSchema the schema to use as the consolidated schema + */ + public static void setOutputSchema(Job job, HCatSchema hcatSchema) + throws IOException { + job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, + HCatUtil.serialize(hcatSchema)); + } + + protected static org.apache.hadoop.mapred.InputFormat + getMapRedInputFormat(JobConf job, Class inputFormatClass) throws IOException { + return ( + org.apache.hadoop.mapred.InputFormat) + ReflectionUtils.newInstance(inputFormatClass, job); + } + + /** + * Logically split the set of input files for the job. Returns the + * underlying InputFormat's splits + * @param jobContext the job context object + * @return the splits, an HCatInputSplit wrapper over the storage + * handler InputSplits + * @throws IOException or InterruptedException + */ + @Override + public List getSplits(JobContext jobContext) + throws IOException, InterruptedException { + Configuration conf = jobContext.getConfiguration(); + + //Get the job info from the configuration, + //throws exception if not initialized + InputJobInfo inputJobInfo; + try { + inputJobInfo = getJobInfo(conf); + } catch (Exception e) { + throw new IOException(e); } - /** - * Logically split the set of input files for the job. Returns the - * underlying InputFormat's splits - * @param jobContext the job context object - * @return the splits, an HCatInputSplit wrapper over the storage - * handler InputSplits - * @throws IOException or InterruptedException - */ - @Override - public List getSplits(JobContext jobContext) - throws IOException, InterruptedException { - Configuration conf = jobContext.getConfiguration(); - - //Get the job info from the configuration, - //throws exception if not initialized - InputJobInfo inputJobInfo; - try { - inputJobInfo = getJobInfo(conf); - } catch (Exception e) { - throw new IOException(e); - } - - List splits = new ArrayList(); - List partitionInfoList = inputJobInfo.getPartitions(); - if (partitionInfoList == null) { - //No partitions match the specified partition filter - return splits; - } - - HCatStorageHandler storageHandler; - JobConf jobConf; - //For each matching partition, call getSplits on the underlying InputFormat - for (PartInfo partitionInfo : partitionInfoList) { - jobConf = HCatUtil.getJobConfFromContext(jobContext); - setInputPath(jobConf, partitionInfo.getLocation()); - Map jobProperties = partitionInfo.getJobProperties(); - - HCatSchema allCols = new HCatSchema(new LinkedList()); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getDataColumns().getFields()) - allCols.append(field); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getPartitionColumns().getFields()) - allCols.append(field); - - HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); - - storageHandler = HCatUtil.getStorageHandler( - jobConf, partitionInfo); - - //Get the input format - Class inputFormatClass = storageHandler.getInputFormatClass(); - org.apache.hadoop.mapred.InputFormat inputFormat = - getMapRedInputFormat(jobConf, inputFormatClass); - - //Call getSplit on the InputFormat, create an HCatSplit for each - //underlying split. When the desired number of input splits is missing, - //use a default number (denoted by zero). - //TODO(malewicz): Currently each partition is split independently into - //a desired number. However, we want the union of all partitions to be - //split into a desired number while maintaining balanced sizes of input - //splits. - int desiredNumSplits = - conf.getInt(HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, 0); - org.apache.hadoop.mapred.InputSplit[] baseSplits = - inputFormat.getSplits(jobConf, desiredNumSplits); - - for (org.apache.hadoop.mapred.InputSplit split : baseSplits) { - splits.add(new HCatSplit( - partitionInfo, - split, allCols)); - } - } - - return splits; + List splits = new ArrayList(); + List partitionInfoList = inputJobInfo.getPartitions(); + if (partitionInfoList == null) { + //No partitions match the specified partition filter + return splits; } - /** - * Create the RecordReader for the given InputSplit. Returns the underlying - * RecordReader if the required operations are supported and schema matches - * with HCatTable schema. Returns an HCatRecordReader if operations need to - * be implemented in HCat. - * @param split the split - * @param taskContext the task attempt context - * @return the record reader instance, either an HCatRecordReader(later) or - * the underlying storage handler's RecordReader - * @throws IOException or InterruptedException - */ - @Override - public RecordReader - createRecordReader(InputSplit split, - TaskAttemptContext taskContext) throws IOException, InterruptedException { - - HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); - PartInfo partitionInfo = hcatSplit.getPartitionInfo(); - JobContext jobContext = taskContext; - Configuration conf = jobContext.getConfiguration(); - - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( - conf, partitionInfo); - - JobConf jobConf = HCatUtil.getJobConfFromContext(jobContext); - Map jobProperties = partitionInfo.getJobProperties(); - HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); - - Map valuesNotInDataCols = getColValsNotInDataColumns( - getOutputSchema(conf), partitionInfo - ); - - return new HCatRecordReader(storageHandler, valuesNotInDataCols); + HCatStorageHandler storageHandler; + JobConf jobConf; + //For each matching partition, call getSplits on the underlying InputFormat + for (PartInfo partitionInfo : partitionInfoList) { + jobConf = HCatUtil.getJobConfFromContext(jobContext); + setInputPath(jobConf, partitionInfo.getLocation()); + Map jobProperties = partitionInfo.getJobProperties(); + + HCatSchema allCols = new HCatSchema(new LinkedList()); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getDataColumns().getFields()) + allCols.append(field); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getPartitionColumns().getFields()) + allCols.append(field); + + HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); + + storageHandler = HCatUtil.getStorageHandler( + jobConf, partitionInfo); + + //Get the input format + Class inputFormatClass = storageHandler.getInputFormatClass(); + org.apache.hadoop.mapred.InputFormat inputFormat = + getMapRedInputFormat(jobConf, inputFormatClass); + + //Call getSplit on the InputFormat, create an HCatSplit for each + //underlying split. When the desired number of input splits is missing, + //use a default number (denoted by zero). + //TODO(malewicz): Currently each partition is split independently into + //a desired number. However, we want the union of all partitions to be + //split into a desired number while maintaining balanced sizes of input + //splits. + int desiredNumSplits = + conf.getInt(HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, 0); + org.apache.hadoop.mapred.InputSplit[] baseSplits = + inputFormat.getSplits(jobConf, desiredNumSplits); + + for (org.apache.hadoop.mapred.InputSplit split : baseSplits) { + splits.add(new HCatSplit( + partitionInfo, + split, allCols)); + } } - - /** - * gets values for fields requested by output schema which will not be in the data - */ - private static Map getColValsNotInDataColumns(HCatSchema outputSchema, - PartInfo partInfo) { - HCatSchema dataSchema = partInfo.getPartitionSchema(); - Map vals = new HashMap(); - for (String fieldName : outputSchema.getFieldNames()) { - if (dataSchema.getPosition(fieldName) == null) { - // this entry of output is not present in the output schema - // so, we first check the table schema to see if it is a part col - - if (partInfo.getPartitionValues().containsKey(fieldName)) { - vals.put(fieldName, partInfo.getPartitionValues().get(fieldName)); - } else { - vals.put(fieldName, null); - } - } + return splits; + } + + /** + * Create the RecordReader for the given InputSplit. Returns the underlying + * RecordReader if the required operations are supported and schema matches + * with HCatTable schema. Returns an HCatRecordReader if operations need to + * be implemented in HCat. + * @param split the split + * @param taskContext the task attempt context + * @return the record reader instance, either an HCatRecordReader(later) or + * the underlying storage handler's RecordReader + * @throws IOException or InterruptedException + */ + @Override + public RecordReader + createRecordReader(InputSplit split, + TaskAttemptContext taskContext) throws IOException, InterruptedException { + + HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); + PartInfo partitionInfo = hcatSplit.getPartitionInfo(); + JobContext jobContext = taskContext; + Configuration conf = jobContext.getConfiguration(); + + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( + conf, partitionInfo); + + JobConf jobConf = HCatUtil.getJobConfFromContext(jobContext); + Map jobProperties = partitionInfo.getJobProperties(); + HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); + + Map valuesNotInDataCols = getColValsNotInDataColumns( + getOutputSchema(conf), partitionInfo + ); + + return new HCatRecordReader(storageHandler, valuesNotInDataCols); + } + + + /** + * gets values for fields requested by output schema which will not be in the data + */ + private static Map getColValsNotInDataColumns(HCatSchema outputSchema, + PartInfo partInfo) { + HCatSchema dataSchema = partInfo.getPartitionSchema(); + Map vals = new HashMap(); + for (String fieldName : outputSchema.getFieldNames()) { + if (dataSchema.getPosition(fieldName) == null) { + // this entry of output is not present in the output schema + // so, we first check the table schema to see if it is a part col + + if (partInfo.getPartitionValues().containsKey(fieldName)) { + vals.put(fieldName, partInfo.getPartitionValues().get(fieldName)); + } else { + vals.put(fieldName, null); } - return vals; - } - - /** - * @see org.apache.hcatalog.mapreduce.HCatBaseInputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) - * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} - */ - public static HCatSchema getTableSchema(JobContext context) - throws IOException { - return getTableSchema(context.getConfiguration()); + } } - - - /** - * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call - * on the specified job context. This information is available only after HCatInputFormat.setInput - * has been called for a JobContext. - * @param conf the Configuration object - * @return the table schema - * @throws IOException if HCatInputFormat.setInput has not been called - * for the current context - */ - public static HCatSchema getTableSchema(Configuration conf) - throws IOException { - InputJobInfo inputJobInfo = getJobInfo(conf); - HCatSchema allCols = new HCatSchema(new LinkedList()); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getDataColumns().getFields()) - allCols.append(field); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getPartitionColumns().getFields()) - allCols.append(field); - return allCols; + return vals; + } + + /** + * @see org.apache.hcatalog.mapreduce.HCatBaseInputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) + * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} + */ + public static HCatSchema getTableSchema(JobContext context) + throws IOException { + return getTableSchema(context.getConfiguration()); + } + + + /** + * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call + * on the specified job context. This information is available only after HCatInputFormat.setInput + * has been called for a JobContext. + * @param conf the Configuration object + * @return the table schema + * @throws IOException if HCatInputFormat.setInput has not been called + * for the current context + */ + public static HCatSchema getTableSchema(Configuration conf) + throws IOException { + InputJobInfo inputJobInfo = getJobInfo(conf); + HCatSchema allCols = new HCatSchema(new LinkedList()); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getDataColumns().getFields()) + allCols.append(field); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getPartitionColumns().getFields()) + allCols.append(field); + return allCols; + } + + /** + * Gets the InputJobInfo object by reading the Configuration and deserializing + * the string. If InputJobInfo is not present in the configuration, throws an + * exception since that means HCatInputFormat.setInput has not been called. + * @param conf the Configuration object + * @return the InputJobInfo object + * @throws IOException the exception + */ + private static InputJobInfo getJobInfo(Configuration conf) + throws IOException { + String jobString = conf.get( + HCatConstants.HCAT_KEY_JOB_INFO); + if (jobString == null) { + throw new IOException("job information not found in JobContext." + + " HCatInputFormat.setInput() not called?"); } - /** - * Gets the InputJobInfo object by reading the Configuration and deserializing - * the string. If InputJobInfo is not present in the configuration, throws an - * exception since that means HCatInputFormat.setInput has not been called. - * @param conf the Configuration object - * @return the InputJobInfo object - * @throws IOException the exception - */ - private static InputJobInfo getJobInfo(Configuration conf) - throws IOException { - String jobString = conf.get( - HCatConstants.HCAT_KEY_JOB_INFO); - if (jobString == null) { - throw new IOException("job information not found in JobContext." - + " HCatInputFormat.setInput() not called?"); + return (InputJobInfo) HCatUtil.deserialize(jobString); + } + + private void setInputPath(JobConf jobConf, String location) + throws IOException { + + // ideally we should just call FileInputFormat.setInputPaths() here - but + // that won't work since FileInputFormat.setInputPaths() needs + // a Job object instead of a JobContext which we are handed here + + int length = location.length(); + int curlyOpen = 0; + int pathStart = 0; + boolean globPattern = false; + List pathStrings = new ArrayList(); + + for (int i = 0; i < length; i++) { + char ch = location.charAt(i); + switch (ch) { + case '{': { + curlyOpen++; + if (!globPattern) { + globPattern = true; } - - return (InputJobInfo) HCatUtil.deserialize(jobString); - } - - private void setInputPath(JobConf jobConf, String location) - throws IOException { - - // ideally we should just call FileInputFormat.setInputPaths() here - but - // that won't work since FileInputFormat.setInputPaths() needs - // a Job object instead of a JobContext which we are handed here - - int length = location.length(); - int curlyOpen = 0; - int pathStart = 0; - boolean globPattern = false; - List pathStrings = new ArrayList(); - - for (int i = 0; i < length; i++) { - char ch = location.charAt(i); - switch (ch) { - case '{': { - curlyOpen++; - if (!globPattern) { - globPattern = true; - } - break; - } - case '}': { - curlyOpen--; - if (curlyOpen == 0 && globPattern) { - globPattern = false; - } - break; - } - case ',': { - if (!globPattern) { - pathStrings.add(location.substring(pathStart, i)); - pathStart = i + 1; - } - break; - } - } + break; + } + case '}': { + curlyOpen--; + if (curlyOpen == 0 && globPattern) { + globPattern = false; } - pathStrings.add(location.substring(pathStart, length)); - - Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); - String separator = ""; - StringBuilder str = new StringBuilder(); - - for (Path path : paths) { - FileSystem fs = path.getFileSystem(jobConf); - final String qualifiedPath = fs.makeQualified(path).toString(); - str.append(separator) - .append(StringUtils.escapeString(qualifiedPath)); - separator = StringUtils.COMMA_STR; + break; + } + case ',': { + if (!globPattern) { + pathStrings.add(location.substring(pathStart, i)); + pathStart = i + 1; } - - jobConf.set("mapred.input.dir", str.toString()); + break; + } + } } + pathStrings.add(location.substring(pathStart, length)); + + Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); + String separator = ""; + StringBuilder str = new StringBuilder(); + + for (Path path : paths) { + FileSystem fs = path.getFileSystem(jobConf); + final String qualifiedPath = fs.makeQualified(path).toString(); + str.append(separator) + .append(StringUtils.escapeString(qualifiedPath)); + separator = StringUtils.COMMA_STR; + } + + jobConf.set("mapred.input.dir", str.toString()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java index f8b6118..f1feb1a 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java @@ -44,116 +44,116 @@ // static final private Log LOG = LogFactory.getLog(HCatBaseOutputFormat.class); - /** - * @see org.apache.hcatalog.mapreduce.HCatBaseOutputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) - * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} - */ - public static HCatSchema getTableSchema(JobContext context) throws IOException { - return getTableSchema(context.getConfiguration()); + /** + * @see org.apache.hcatalog.mapreduce.HCatBaseOutputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) + * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} + */ + public static HCatSchema getTableSchema(JobContext context) throws IOException { + return getTableSchema(context.getConfiguration()); + } + + /** + * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call + * on the specified job context. + * @param conf the Configuration object + * @return the table schema + * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context + */ + public static HCatSchema getTableSchema(Configuration conf) throws IOException { + OutputJobInfo jobInfo = getJobInfo(conf); + return jobInfo.getTableInfo().getDataColumns(); + } + + /** + * Check for validity of the output-specification for the job. + * @param context information about the job + * @throws IOException when output should not be attempted + */ + @Override + public void checkOutputSpecs(JobContext context + ) throws IOException, InterruptedException { + getOutputFormat(context).checkOutputSpecs(context); + } + + /** + * Gets the output format instance. + * @param context the job context + * @return the output format instance + * @throws IOException + */ + protected OutputFormat, HCatRecord> getOutputFormat(JobContext context) throws IOException { + OutputJobInfo jobInfo = getJobInfo(context); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + //why do we need this? + configureOutputStorageHandler(context); + return storageHandler.getOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration())); + } + + /** + * @see org.apache.hcatalog.mapreduce.HCatBaseOutputFormat#getJobInfo(org.apache.hadoop.conf.Configuration) + * @deprecated use {@link #getJobInfo(org.apache.hadoop.conf.Configuration)} + */ + public static OutputJobInfo getJobInfo(JobContext jobContext) throws IOException { + return getJobInfo(jobContext.getConfiguration()); + } + + /** + * Gets the HCatOuputJobInfo object by reading the Configuration and deserializing + * the string. If InputJobInfo is not present in the configuration, throws an + * exception since that means HCatOutputFormat.setOutput has not been called. + * @param conf the job Configuration object + * @return the OutputJobInfo object + * @throws IOException the IO exception + */ + public static OutputJobInfo getJobInfo(Configuration conf) throws IOException { + String jobString = conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + if (jobString == null) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED); } - /** - * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call - * on the specified job context. - * @param conf the Configuration object - * @return the table schema - * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context - */ - public static HCatSchema getTableSchema(Configuration conf) throws IOException { - OutputJobInfo jobInfo = getJobInfo(conf); - return jobInfo.getTableInfo().getDataColumns(); - } - - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context - ) throws IOException, InterruptedException { - getOutputFormat(context).checkOutputSpecs(context); - } - - /** - * Gets the output format instance. - * @param context the job context - * @return the output format instance - * @throws IOException - */ - protected OutputFormat, HCatRecord> getOutputFormat(JobContext context) throws IOException { - OutputJobInfo jobInfo = getJobInfo(context); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - //why do we need this? - configureOutputStorageHandler(context); - return storageHandler.getOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration())); - } - - /** - * @see org.apache.hcatalog.mapreduce.HCatBaseOutputFormat#getJobInfo(org.apache.hadoop.conf.Configuration) - * @deprecated use {@link #getJobInfo(org.apache.hadoop.conf.Configuration)} - */ - public static OutputJobInfo getJobInfo(JobContext jobContext) throws IOException { - return getJobInfo(jobContext.getConfiguration()); - } - - /** - * Gets the HCatOuputJobInfo object by reading the Configuration and deserializing - * the string. If InputJobInfo is not present in the configuration, throws an - * exception since that means HCatOutputFormat.setOutput has not been called. - * @param conf the job Configuration object - * @return the OutputJobInfo object - * @throws IOException the IO exception - */ - public static OutputJobInfo getJobInfo(Configuration conf) throws IOException { - String jobString = conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - if (jobString == null) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED); + return (OutputJobInfo) HCatUtil.deserialize(jobString); + } + + /** + * Configure the output storage handler + * @param jobContext the job context + * @throws IOException + */ + @SuppressWarnings("unchecked") + static void configureOutputStorageHandler( + JobContext jobContext) throws IOException { + configureOutputStorageHandler(jobContext, (List) null); + } + + /** + * Configure the output storage handler with allowing specification of missing dynamic partvals + * @param jobContext the job context + * @param dynamicPartVals + * @throws IOException + */ + @SuppressWarnings("unchecked") + static void configureOutputStorageHandler( + JobContext jobContext, List dynamicPartVals) throws IOException { + Configuration conf = jobContext.getConfiguration(); + try { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, jobInfo.getTableInfo().getStorerInfo()); + + Map partitionValues = jobInfo.getPartitionValues(); + String location = jobInfo.getLocation(); + + if (dynamicPartVals != null) { + // dynamic part vals specified + List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); + if (dynamicPartVals.size() != dynamicPartKeys.size()) { + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Unable to configure dynamic partitioning for storage handler, mismatch between" + + " number of partition values obtained[" + dynamicPartVals.size() + + "] and number of partition values required[" + dynamicPartKeys.size() + "]"); + } + for (int i = 0; i < dynamicPartKeys.size(); i++) { + partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i)); } - - return (OutputJobInfo) HCatUtil.deserialize(jobString); - } - - /** - * Configure the output storage handler - * @param jobContext the job context - * @throws IOException - */ - @SuppressWarnings("unchecked") - static void configureOutputStorageHandler( - JobContext jobContext) throws IOException { - configureOutputStorageHandler(jobContext, (List) null); - } - - /** - * Configure the output storage handler with allowing specification of missing dynamic partvals - * @param jobContext the job context - * @param dynamicPartVals - * @throws IOException - */ - @SuppressWarnings("unchecked") - static void configureOutputStorageHandler( - JobContext jobContext, List dynamicPartVals) throws IOException { - Configuration conf = jobContext.getConfiguration(); - try { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, jobInfo.getTableInfo().getStorerInfo()); - - Map partitionValues = jobInfo.getPartitionValues(); - String location = jobInfo.getLocation(); - - if (dynamicPartVals != null) { - // dynamic part vals specified - List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); - if (dynamicPartVals.size() != dynamicPartKeys.size()) { - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Unable to configure dynamic partitioning for storage handler, mismatch between" - + " number of partition values obtained[" + dynamicPartVals.size() - + "] and number of partition values required[" + dynamicPartKeys.size() + "]"); - } - for (int i = 0; i < dynamicPartKeys.size(); i++) { - partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i)); - } // // re-home location, now that we know the rest of the partvals // Table table = jobInfo.getTableInfo().getTable(); @@ -162,85 +162,85 @@ static void configureOutputStorageHandler( // for(FieldSchema schema : table.getPartitionKeys()) { // partitionCols.add(schema.getName()); // } - jobInfo.setPartitionValues(partitionValues); - } - - HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo); - } catch (Exception e) { - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e); - } - } + jobInfo.setPartitionValues(partitionValues); + } + + HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo); + } catch (Exception e) { + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e); + } } - - /** - * Configure the output storage handler, with allowing specification - * of partvals from which it picks the dynamic partvals - * @param context the job context - * @param jobInfo the output job info - * @param fullPartSpec - * @throws IOException - */ - - protected static void configureOutputStorageHandler( - JobContext context, OutputJobInfo jobInfo, - Map fullPartSpec) throws IOException { - List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); - if ((dynamicPartKeys == null) || (dynamicPartKeys.isEmpty())) { - configureOutputStorageHandler(context, (List) null); - } else { - List dynKeyVals = new ArrayList(); - for (String dynamicPartKey : dynamicPartKeys) { - dynKeyVals.add(fullPartSpec.get(dynamicPartKey)); - } - configureOutputStorageHandler(context, dynKeyVals); - } + } + + /** + * Configure the output storage handler, with allowing specification + * of partvals from which it picks the dynamic partvals + * @param context the job context + * @param jobInfo the output job info + * @param fullPartSpec + * @throws IOException + */ + + protected static void configureOutputStorageHandler( + JobContext context, OutputJobInfo jobInfo, + Map fullPartSpec) throws IOException { + List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); + if ((dynamicPartKeys == null) || (dynamicPartKeys.isEmpty())) { + configureOutputStorageHandler(context, (List) null); + } else { + List dynKeyVals = new ArrayList(); + for (String dynamicPartKey : dynamicPartKeys) { + dynKeyVals.add(fullPartSpec.get(dynamicPartKey)); + } + configureOutputStorageHandler(context, dynKeyVals); } + } - protected static void setPartDetails(OutputJobInfo jobInfo, final HCatSchema schema, - Map partMap) throws HCatException, IOException { - List posOfPartCols = new ArrayList(); - List posOfDynPartCols = new ArrayList(); + protected static void setPartDetails(OutputJobInfo jobInfo, final HCatSchema schema, + Map partMap) throws HCatException, IOException { + List posOfPartCols = new ArrayList(); + List posOfDynPartCols = new ArrayList(); - // If partition columns occur in data, we want to remove them. - // So, find out positions of partition columns in schema provided by user. - // We also need to update the output Schema with these deletions. + // If partition columns occur in data, we want to remove them. + // So, find out positions of partition columns in schema provided by user. + // We also need to update the output Schema with these deletions. - // Note that, output storage handlers never sees partition columns in data - // or schema. + // Note that, output storage handlers never sees partition columns in data + // or schema. - HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields()); - for (String partKey : partMap.keySet()) { - Integer idx; - if ((idx = schema.getPosition(partKey)) != null) { - posOfPartCols.add(idx); - schemaWithoutParts.remove(schema.get(partKey)); - } - } + HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields()); + for (String partKey : partMap.keySet()) { + Integer idx; + if ((idx = schema.getPosition(partKey)) != null) { + posOfPartCols.add(idx); + schemaWithoutParts.remove(schema.get(partKey)); + } + } - // Also, if dynamic partitioning is being used, we want to - // set appropriate list of columns for the columns to be dynamically specified. - // These would be partition keys too, so would also need to be removed from - // output schema and partcols - - if (jobInfo.isDynamicPartitioningUsed()) { - for (String partKey : jobInfo.getDynamicPartitioningKeys()) { - Integer idx; - if ((idx = schema.getPosition(partKey)) != null) { - posOfPartCols.add(idx); - posOfDynPartCols.add(idx); - schemaWithoutParts.remove(schema.get(partKey)); - } - } + // Also, if dynamic partitioning is being used, we want to + // set appropriate list of columns for the columns to be dynamically specified. + // These would be partition keys too, so would also need to be removed from + // output schema and partcols + + if (jobInfo.isDynamicPartitioningUsed()) { + for (String partKey : jobInfo.getDynamicPartitioningKeys()) { + Integer idx; + if ((idx = schema.getPosition(partKey)) != null) { + posOfPartCols.add(idx); + posOfDynPartCols.add(idx); + schemaWithoutParts.remove(schema.get(partKey)); } - - HCatUtil.validatePartitionSchema( - new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); - jobInfo.setPosOfPartCols(posOfPartCols); - jobInfo.setPosOfDynPartCols(posOfDynPartCols); - jobInfo.setOutputSchema(schemaWithoutParts); + } } + + HCatUtil.validatePartitionSchema( + new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); + jobInfo.setPosOfPartCols(posOfPartCols); + jobInfo.setPosOfDynPartCols(posOfDynPartCols); + jobInfo.setOutputSchema(schemaWithoutParts); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatInputFormat.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatInputFormat.java index fd80e86..87a0202 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatInputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatInputFormat.java @@ -36,102 +36,102 @@ @InterfaceStability.Evolving public class HCatInputFormat extends HCatBaseInputFormat { - private Configuration conf; - private InputJobInfo inputJobInfo; + private Configuration conf; + private InputJobInfo inputJobInfo; - /** - * @deprecated as of release 0.5, and will be removed in a future release - */ - @Deprecated - public static void setInput(Job job, InputJobInfo inputJobInfo) throws IOException { - setInput(job.getConfiguration(), inputJobInfo); - } + /** + * @deprecated as of release 0.5, and will be removed in a future release + */ + @Deprecated + public static void setInput(Job job, InputJobInfo inputJobInfo) throws IOException { + setInput(job.getConfiguration(), inputJobInfo); + } - /** - * @deprecated as of release 0.5, and will be removed in a future release - */ - @Deprecated - public static void setInput(Configuration conf, InputJobInfo inputJobInfo) throws IOException { - setInput(conf, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()) - .setFilter(inputJobInfo.getFilter()) - .setProperties(inputJobInfo.getProperties()); - } + /** + * @deprecated as of release 0.5, and will be removed in a future release + */ + @Deprecated + public static void setInput(Configuration conf, InputJobInfo inputJobInfo) throws IOException { + setInput(conf, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()) + .setFilter(inputJobInfo.getFilter()) + .setProperties(inputJobInfo.getProperties()); + } - /** - * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String)} - */ - public static HCatInputFormat setInput(Job job, String dbName, String tableName) throws IOException { - return setInput(job.getConfiguration(), dbName, tableName); - } + /** + * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String)} + */ + public static HCatInputFormat setInput(Job job, String dbName, String tableName) throws IOException { + return setInput(job.getConfiguration(), dbName, tableName); + } - /** - * Set inputs to use for the job. This queries the metastore with the given input - * specification and serializes matching partitions into the job conf for use by MR tasks. - * @param conf the job configuration - * @param dbName database name, which if null 'default' is used - * @param tableName table name - * @throws IOException on all errors - */ - public static HCatInputFormat setInput(Configuration conf, String dbName, String tableName) - throws IOException { + /** + * Set inputs to use for the job. This queries the metastore with the given input + * specification and serializes matching partitions into the job conf for use by MR tasks. + * @param conf the job configuration + * @param dbName database name, which if null 'default' is used + * @param tableName table name + * @throws IOException on all errors + */ + public static HCatInputFormat setInput(Configuration conf, String dbName, String tableName) + throws IOException { - Preconditions.checkNotNull(conf, "required argument 'conf' is null"); - Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); + Preconditions.checkNotNull(conf, "required argument 'conf' is null"); + Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); - HCatInputFormat hCatInputFormat = new HCatInputFormat(); - hCatInputFormat.conf = conf; - hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, null, null); + HCatInputFormat hCatInputFormat = new HCatInputFormat(); + hCatInputFormat.conf = conf; + hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, null, null); - try { - InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - - return hCatInputFormat; + try { + InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); + } catch (Exception e) { + throw new IOException(e); } - /** - * Set a filter on the input table. - * @param filter the filter specification, which may be null - * @return this - * @throws IOException on all errors - */ - public HCatInputFormat setFilter(String filter) throws IOException { - // null filters are supported to simplify client code - if (filter != null) { - inputJobInfo = InputJobInfo.create( - inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - filter, - inputJobInfo.getProperties()); - try { - InitializeInput.setInput(conf, inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - } - return this; + return hCatInputFormat; + } + + /** + * Set a filter on the input table. + * @param filter the filter specification, which may be null + * @return this + * @throws IOException on all errors + */ + public HCatInputFormat setFilter(String filter) throws IOException { + // null filters are supported to simplify client code + if (filter != null) { + inputJobInfo = InputJobInfo.create( + inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + filter, + inputJobInfo.getProperties()); + try { + InitializeInput.setInput(conf, inputJobInfo); + } catch (Exception e) { + throw new IOException(e); + } } + return this; + } - /** - * Set properties for the input format. - * @param properties properties for the input specification - * @return this - * @throws IOException on all errors - */ - public HCatInputFormat setProperties(Properties properties) throws IOException { - Preconditions.checkNotNull(properties, "required argument 'properties' is null"); - inputJobInfo = InputJobInfo.create( - inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - inputJobInfo.getFilter(), - properties); - try { - InitializeInput.setInput(conf, inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - return this; + /** + * Set properties for the input format. + * @param properties properties for the input specification + * @return this + * @throws IOException on all errors + */ + public HCatInputFormat setProperties(Properties properties) throws IOException { + Preconditions.checkNotNull(properties, "required argument 'properties' is null"); + inputJobInfo = InputJobInfo.create( + inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + inputJobInfo.getFilter(), + properties); + try { + InitializeInput.setInput(conf, inputJobInfo); + } catch (Exception e) { + throw new IOException(e); } + return this; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java index 9ab8a6e..facade9 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java @@ -55,226 +55,226 @@ */ public class HCatOutputFormat extends HCatBaseOutputFormat { - static final private Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); - - private static int maxDynamicPartitions; - private static boolean harRequested; - - /** - * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) - */ - public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { - setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); - } - - /** - * Set the information about the output to write for the job. This queries the metadata server - * to find the StorageHandler to use for the table. It throws an error if the - * partition is already published. - * @param conf the Configuration object - * @param credentials the Credentials object - * @param outputJobInfo the table output information for the job - * @throws IOException the exception in communicating with the metadata server - */ - @SuppressWarnings("unchecked") - public static void setOutput(Configuration conf, Credentials credentials, - OutputJobInfo outputJobInfo) throws IOException { - HiveMetaStoreClient client = null; - - try { - - HiveConf hiveConf = HCatUtil.getHiveConf(conf); - client = HCatUtil.getHiveClient(hiveConf); - Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), - outputJobInfo.getTableName()); - - List indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE); - - for (String indexName : indexList) { - Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName); - if (!index.isDeferredRebuild()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported"); - } - } - StorageDescriptor sd = table.getTTable().getSd(); + static final private Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); + + private static int maxDynamicPartitions; + private static boolean harRequested; + + /** + * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) + */ + public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { + setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); + } + + /** + * Set the information about the output to write for the job. This queries the metadata server + * to find the StorageHandler to use for the table. It throws an error if the + * partition is already published. + * @param conf the Configuration object + * @param credentials the Credentials object + * @param outputJobInfo the table output information for the job + * @throws IOException the exception in communicating with the metadata server + */ + @SuppressWarnings("unchecked") + public static void setOutput(Configuration conf, Credentials credentials, + OutputJobInfo outputJobInfo) throws IOException { + HiveMetaStoreClient client = null; + + try { + + HiveConf hiveConf = HCatUtil.getHiveConf(conf); + client = HCatUtil.getHiveClient(hiveConf); + Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), + outputJobInfo.getTableName()); + + List indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE); + + for (String indexName : indexList) { + Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName); + if (!index.isDeferredRebuild()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported"); + } + } + StorageDescriptor sd = table.getTTable().getSd(); + + if (sd.isCompressed()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported"); + } + + if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported"); + } + + if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported"); + } + + if (table.getTTable().getPartitionKeysSize() == 0) { + if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) { + // attempt made to save partition values in non-partitioned table - throw error. + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Partition values specified for non-partitioned table"); + } + // non-partitioned table + outputJobInfo.setPartitionValues(new HashMap()); + + } else { + // partitioned table, we expect partition values + // convert user specified map to have lower case key names + Map valueMap = new HashMap(); + if (outputJobInfo.getPartitionValues() != null) { + for (Map.Entry entry : outputJobInfo.getPartitionValues().entrySet()) { + valueMap.put(entry.getKey().toLowerCase(), entry.getValue()); + } + } - if (sd.isCompressed()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported"); + if ((outputJobInfo.getPartitionValues() == null) + || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) { + // dynamic partition usecase - partition values were null, or not all were specified + // need to figure out which keys are not specified. + List dynamicPartitioningKeys = new ArrayList(); + boolean firstItem = true; + for (FieldSchema fs : table.getPartitionKeys()) { + if (!valueMap.containsKey(fs.getName().toLowerCase())) { + dynamicPartitioningKeys.add(fs.getName().toLowerCase()); } + } - if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported"); - } + if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) { + // If this isn't equal, then bogus key values have been inserted, error out. + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified"); + } - if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported"); - } - - if (table.getTTable().getPartitionKeysSize() == 0) { - if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) { - // attempt made to save partition values in non-partitioned table - throw error. - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Partition values specified for non-partitioned table"); - } - // non-partitioned table - outputJobInfo.setPartitionValues(new HashMap()); - - } else { - // partitioned table, we expect partition values - // convert user specified map to have lower case key names - Map valueMap = new HashMap(); - if (outputJobInfo.getPartitionValues() != null) { - for (Map.Entry entry : outputJobInfo.getPartitionValues().entrySet()) { - valueMap.put(entry.getKey().toLowerCase(), entry.getValue()); - } - } - - if ((outputJobInfo.getPartitionValues() == null) - || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) { - // dynamic partition usecase - partition values were null, or not all were specified - // need to figure out which keys are not specified. - List dynamicPartitioningKeys = new ArrayList(); - boolean firstItem = true; - for (FieldSchema fs : table.getPartitionKeys()) { - if (!valueMap.containsKey(fs.getName().toLowerCase())) { - dynamicPartitioningKeys.add(fs.getName().toLowerCase()); - } - } - - if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) { - // If this isn't equal, then bogus key values have been inserted, error out. - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified"); - } - - outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys); - String dynHash; - if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) { - dynHash = String.valueOf(Math.random()); + outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys); + String dynHash; + if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) { + dynHash = String.valueOf(Math.random()); // LOG.info("New dynHash : ["+dynHash+"]"); // }else{ // LOG.info("Old dynHash : ["+dynHash+"]"); - } - conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash); - - } - - outputJobInfo.setPartitionValues(valueMap); - } - - HCatSchema tableSchema = HCatUtil.extractSchema(table); - StorerInfo storerInfo = - InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); - - List partitionCols = new ArrayList(); - for (FieldSchema schema : table.getPartitionKeys()) { - partitionCols.add(schema.getName()); - } - - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); - - //Serialize the output info into the configuration - outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); - outputJobInfo.setOutputSchema(tableSchema); - harRequested = getHarRequested(hiveConf); - outputJobInfo.setHarRequested(harRequested); - maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); - outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions); + } + conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash); - HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo); - - Path tblPath = new Path(table.getTTable().getSd().getLocation()); - - /* Set the umask in conf such that files/dirs get created with table-dir - * permissions. Following three assumptions are made: - * 1. Actual files/dirs creation is done by RecordWriter of underlying - * output format. It is assumed that they use default permissions while creation. - * 2. Default Permissions = FsPermission.getDefault() = 777. - * 3. UMask is honored by underlying filesystem. - */ - - FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask( - tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission())); - - if (Security.getInstance().isSecurityEnabled()) { - Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested); - } - } catch (Exception e) { - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } - } finally { - HCatUtil.closeHiveClientQuietly(client); } - } - - /** - * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hcatalog.data.schema.HCatSchema) - */ - public static void setSchema(final Job job, final HCatSchema schema) throws IOException { - setSchema(job.getConfiguration(), schema); - } - /** - * Set the schema for the data being written out to the partition. The - * table schema is used by default for the partition if this is not called. - * @param conf the job Configuration object - * @param schema the schema for the data - * @throws IOException - */ - public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { - OutputJobInfo jobInfo = getJobInfo(conf); - Map partMap = jobInfo.getPartitionValues(); - setPartDetails(jobInfo, schema, partMap); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); + outputJobInfo.setPartitionValues(valueMap); + } + + HCatSchema tableSchema = HCatUtil.extractSchema(table); + StorerInfo storerInfo = + InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); + + List partitionCols = new ArrayList(); + for (FieldSchema schema : table.getPartitionKeys()) { + partitionCols.add(schema.getName()); + } + + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); + + //Serialize the output info into the configuration + outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); + outputJobInfo.setOutputSchema(tableSchema); + harRequested = getHarRequested(hiveConf); + outputJobInfo.setHarRequested(harRequested); + maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); + outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions); + + HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo); + + Path tblPath = new Path(table.getTTable().getSd().getLocation()); + + /* Set the umask in conf such that files/dirs get created with table-dir + * permissions. Following three assumptions are made: + * 1. Actual files/dirs creation is done by RecordWriter of underlying + * output format. It is assumed that they use default permissions while creation. + * 2. Default Permissions = FsPermission.getDefault() = 777. + * 3. UMask is honored by underlying filesystem. + */ + + FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask( + tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission())); + + if (Security.getInstance().isSecurityEnabled()) { + Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested); + } + } catch (Exception e) { + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); + } + } finally { + HCatUtil.closeHiveClientQuietly(client); } - - /** - * Get the record writer for the job. This uses the StorageHandler's default - * OutputFormat to get the record writer. - * @param context the information about the current task - * @return a RecordWriter to write the output for the job - * @throws IOException - * @throws InterruptedException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context) - throws IOException, InterruptedException { - return getOutputFormat(context).getRecordWriter(context); + } + + /** + * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hcatalog.data.schema.HCatSchema) + */ + public static void setSchema(final Job job, final HCatSchema schema) throws IOException { + setSchema(job.getConfiguration(), schema); + } + + /** + * Set the schema for the data being written out to the partition. The + * table schema is used by default for the partition if this is not called. + * @param conf the job Configuration object + * @param schema the schema for the data + * @throws IOException + */ + public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { + OutputJobInfo jobInfo = getJobInfo(conf); + Map partMap = jobInfo.getPartitionValues(); + setPartDetails(jobInfo, schema, partMap); + conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); + } + + /** + * Get the record writer for the job. This uses the StorageHandler's default + * OutputFormat to get the record writer. + * @param context the information about the current task + * @return a RecordWriter to write the output for the job + * @throws IOException + * @throws InterruptedException + */ + @Override + public RecordWriter, HCatRecord> + getRecordWriter(TaskAttemptContext context) + throws IOException, InterruptedException { + return getOutputFormat(context).getRecordWriter(context); + } + + + /** + * Get the output committer for this output format. This is responsible + * for ensuring the output is committed correctly. + * @param context the task context + * @return an output committer + * @throws IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context + ) throws IOException, InterruptedException { + return getOutputFormat(context).getOutputCommitter(context); + } + + private static int getMaxDynamicPartitions(HiveConf hConf) { + // by default the bounds checking for maximum number of + // dynamic partitions is disabled (-1) + int maxDynamicPartitions = -1; + + if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { + maxDynamicPartitions = hConf.getIntVar( + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); } + return maxDynamicPartitions; + } - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context - ) throws IOException, InterruptedException { - return getOutputFormat(context).getOutputCommitter(context); - } - - private static int getMaxDynamicPartitions(HiveConf hConf) { - // by default the bounds checking for maximum number of - // dynamic partitions is disabled (-1) - int maxDynamicPartitions = -1; - - if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { - maxDynamicPartitions = hConf.getIntVar( - HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); - } - - return maxDynamicPartitions; - } - - private static boolean getHarRequested(HiveConf hConf) { - return hConf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED); - } + private static boolean getHarRequested(HiveConf hConf) { + return hConf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java index 5c1a4dc..71f9c44 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java @@ -47,240 +47,240 @@ */ class HCatRecordReader extends RecordReader { - private static final Logger LOG = LoggerFactory.getLogger(HCatRecordReader.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatRecordReader.class); - private InputErrorTracker errorTracker; + private InputErrorTracker errorTracker; - WritableComparable currentKey; - Writable currentValue; - HCatRecord currentHCatRecord; + WritableComparable currentKey; + Writable currentValue; + HCatRecord currentHCatRecord; - /** The underlying record reader to delegate to. */ - private org.apache.hadoop.mapred.RecordReader baseRecordReader; + /** The underlying record reader to delegate to. */ + private org.apache.hadoop.mapred.RecordReader baseRecordReader; - /** The storage handler used */ - private final HCatStorageHandler storageHandler; + /** The storage handler used */ + private final HCatStorageHandler storageHandler; - private Deserializer deserializer; + private Deserializer deserializer; - private Map valuesNotInDataCols; + private Map valuesNotInDataCols; - private HCatSchema outputSchema = null; - private HCatSchema dataSchema = null; + private HCatSchema outputSchema = null; + private HCatSchema dataSchema = null; - /** - * Instantiates a new hcat record reader. - */ - public HCatRecordReader(HCatStorageHandler storageHandler, - Map valuesNotInDataCols) { - this.storageHandler = storageHandler; - this.valuesNotInDataCols = valuesNotInDataCols; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#initialize( - * org.apache.hadoop.mapreduce.InputSplit, - * org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public void initialize(org.apache.hadoop.mapreduce.InputSplit split, - TaskAttemptContext taskContext) throws IOException, InterruptedException { - - HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); + /** + * Instantiates a new hcat record reader. + */ + public HCatRecordReader(HCatStorageHandler storageHandler, + Map valuesNotInDataCols) { + this.storageHandler = storageHandler; + this.valuesNotInDataCols = valuesNotInDataCols; + } - baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); - createDeserializer(hcatSplit, storageHandler, taskContext); - - // Pull the output schema out of the TaskAttemptContext - outputSchema = (HCatSchema) HCatUtil.deserialize( - taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); - - if (outputSchema == null) { - outputSchema = hcatSplit.getTableSchema(); - } + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#initialize( + * org.apache.hadoop.mapreduce.InputSplit, + * org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public void initialize(org.apache.hadoop.mapreduce.InputSplit split, + TaskAttemptContext taskContext) throws IOException, InterruptedException { - // Pull the table schema out of the Split info - // TODO This should be passed in the TaskAttemptContext instead - dataSchema = hcatSplit.getDataSchema(); - - errorTracker = new InputErrorTracker(taskContext.getConfiguration()); - } - - private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, - HCatStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { - - JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); - HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); - org.apache.hadoop.mapred.InputFormat inputFormat = - HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); - return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, - InternalUtil.createReporter(taskContext)); - } + HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); - private void createDeserializer(HCatSplit hcatSplit, HCatStorageHandler storageHandler, - TaskAttemptContext taskContext) throws IOException { + baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); + createDeserializer(hcatSplit, storageHandler, taskContext); - deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), - taskContext.getConfiguration()); + // Pull the output schema out of the TaskAttemptContext + outputSchema = (HCatSchema) HCatUtil.deserialize( + taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); - try { - InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), - hcatSplit.getPartitionInfo().getTableInfo(), - hcatSplit.getPartitionInfo().getPartitionSchema()); - } catch (SerDeException e) { - throw new IOException("Failed initializing deserializer " - + storageHandler.getSerDeClass().getName(), e); - } + if (outputSchema == null) { + outputSchema = hcatSplit.getTableSchema(); } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() - */ - @Override - public WritableComparable getCurrentKey() - throws IOException, InterruptedException { - return currentKey; + // Pull the table schema out of the Split info + // TODO This should be passed in the TaskAttemptContext instead + dataSchema = hcatSplit.getDataSchema(); + + errorTracker = new InputErrorTracker(taskContext.getConfiguration()); + } + + private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, + HCatStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { + + JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); + HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); + org.apache.hadoop.mapred.InputFormat inputFormat = + HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); + return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, + InternalUtil.createReporter(taskContext)); + } + + private void createDeserializer(HCatSplit hcatSplit, HCatStorageHandler storageHandler, + TaskAttemptContext taskContext) throws IOException { + + deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), + taskContext.getConfiguration()); + + try { + InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), + hcatSplit.getPartitionInfo().getTableInfo(), + hcatSplit.getPartitionInfo().getPartitionSchema()); + } catch (SerDeException e) { + throw new IOException("Failed initializing deserializer " + + storageHandler.getSerDeClass().getName(), e); } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() - */ - @Override - public HCatRecord getCurrentValue() throws IOException, InterruptedException { - return currentHCatRecord; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() + */ + @Override + public WritableComparable getCurrentKey() + throws IOException, InterruptedException { + return currentKey; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() + */ + @Override + public HCatRecord getCurrentValue() throws IOException, InterruptedException { + return currentHCatRecord; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() + */ + @Override + public float getProgress() { + try { + return baseRecordReader.getProgress(); + } catch (IOException e) { + LOG.warn("Exception in HCatRecord reader", e); } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() - */ - @Override - public float getProgress() { - try { - return baseRecordReader.getProgress(); - } catch (IOException e) { - LOG.warn("Exception in HCatRecord reader", e); - } - return 0.0f; // errored + return 0.0f; // errored + } + + /** + * Check if the wrapped RecordReader has another record, and if so convert it into an + * HCatRecord. We both check for records and convert here so a configurable percent of + * bad records can be tolerated. + * + * @return if there is a next record + * @throws IOException on error + * @throws InterruptedException on error + */ + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + if (currentKey == null) { + currentKey = baseRecordReader.createKey(); + currentValue = baseRecordReader.createValue(); } - /** - * Check if the wrapped RecordReader has another record, and if so convert it into an - * HCatRecord. We both check for records and convert here so a configurable percent of - * bad records can be tolerated. - * - * @return if there is a next record - * @throws IOException on error - * @throws InterruptedException on error - */ - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { - if (currentKey == null) { - currentKey = baseRecordReader.createKey(); - currentValue = baseRecordReader.createValue(); - } - - while (baseRecordReader.next(currentKey, currentValue)) { - HCatRecord r = null; - Throwable t = null; - - errorTracker.incRecords(); - - try { - Object o = deserializer.deserialize(currentValue); - r = new LazyHCatRecord(o, deserializer.getObjectInspector()); - } catch (Throwable throwable) { - t = throwable; - } - - if (r == null) { - errorTracker.incErrors(t); - continue; - } - - DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size()); - int i = 0; - for (String fieldName : outputSchema.getFieldNames()) { - if (dataSchema.getPosition(fieldName) != null) { - dr.set(i, r.get(fieldName, dataSchema)); - } else { - dr.set(i, valuesNotInDataCols.get(fieldName)); - } - i++; - } - - currentHCatRecord = dr; - return true; + while (baseRecordReader.next(currentKey, currentValue)) { + HCatRecord r = null; + Throwable t = null; + + errorTracker.incRecords(); + + try { + Object o = deserializer.deserialize(currentValue); + r = new LazyHCatRecord(o, deserializer.getObjectInspector()); + } catch (Throwable throwable) { + t = throwable; + } + + if (r == null) { + errorTracker.incErrors(t); + continue; + } + + DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size()); + int i = 0; + for (String fieldName : outputSchema.getFieldNames()) { + if (dataSchema.getPosition(fieldName) != null) { + dr.set(i, r.get(fieldName, dataSchema)); + } else { + dr.set(i, valuesNotInDataCols.get(fieldName)); } + i++; + } - return false; + currentHCatRecord = dr; + return true; } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#close() - */ - @Override - public void close() throws IOException { - baseRecordReader.close(); + return false; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#close() + */ + @Override + public void close() throws IOException { + baseRecordReader.close(); + } + + /** + * Tracks number of of errors in input and throws a Runtime exception + * if the rate of errors crosses a limit. + *
+ * The intention is to skip over very rare file corruption or incorrect + * input, but catch programmer errors (incorrect format, or incorrect + * deserializers etc). + * + * This class was largely copied from Elephant-Bird (thanks @rangadi!) + * https://github.com/kevinweil/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/mapreduce/input/LzoRecordReader.java + */ + static class InputErrorTracker { + long numRecords; + long numErrors; + + double errorThreshold; // max fraction of errors allowed + long minErrors; // throw error only after this many errors + + InputErrorTracker(Configuration conf) { + errorThreshold = conf.getFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, + HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT); + minErrors = conf.getLong(HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_KEY, + HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_DEFAULT); + numRecords = 0; + numErrors = 0; } - /** - * Tracks number of of errors in input and throws a Runtime exception - * if the rate of errors crosses a limit. - *
- * The intention is to skip over very rare file corruption or incorrect - * input, but catch programmer errors (incorrect format, or incorrect - * deserializers etc). - * - * This class was largely copied from Elephant-Bird (thanks @rangadi!) - * https://github.com/kevinweil/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/mapreduce/input/LzoRecordReader.java - */ - static class InputErrorTracker { - long numRecords; - long numErrors; - - double errorThreshold; // max fraction of errors allowed - long minErrors; // throw error only after this many errors - - InputErrorTracker(Configuration conf) { - errorThreshold = conf.getFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, - HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT); - minErrors = conf.getLong(HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_KEY, - HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_DEFAULT); - numRecords = 0; - numErrors = 0; - } - - void incRecords() { - numRecords++; - } + void incRecords() { + numRecords++; + } - void incErrors(Throwable cause) { - numErrors++; - if (numErrors > numRecords) { - // incorrect use of this class - throw new RuntimeException("Forgot to invoke incRecords()?"); - } - - if (cause == null) { - cause = new Exception("Unknown error"); - } - - if (errorThreshold <= 0) { // no errors are tolerated - throw new RuntimeException("error while reading input records", cause); - } - - LOG.warn("Error while reading an input record (" - + numErrors + " out of " + numRecords + " so far ): ", cause); - - double errRate = numErrors / (double) numRecords; - - // will always excuse the first error. We can decide if single - // error crosses threshold inside close() if we want to. - if (numErrors >= minErrors && errRate > errorThreshold) { - LOG.error(numErrors + " out of " + numRecords - + " crosses configured threshold (" + errorThreshold + ")"); - throw new RuntimeException("error rate while reading input records crossed threshold", cause); - } - } + void incErrors(Throwable cause) { + numErrors++; + if (numErrors > numRecords) { + // incorrect use of this class + throw new RuntimeException("Forgot to invoke incRecords()?"); + } + + if (cause == null) { + cause = new Exception("Unknown error"); + } + + if (errorThreshold <= 0) { // no errors are tolerated + throw new RuntimeException("error while reading input records", cause); + } + + LOG.warn("Error while reading an input record (" + + numErrors + " out of " + numRecords + " so far ): ", cause); + + double errRate = numErrors / (double) numRecords; + + // will always excuse the first error. We can decide if single + // error crosses threshold inside close() if we want to. + if (numErrors >= minErrors && errRate > errorThreshold) { + LOG.error(numErrors + " out of " + numRecords + + " crosses configured threshold (" + errorThreshold + ")"); + throw new RuntimeException("error rate while reading input records crossed threshold", cause); + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatSplit.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatSplit.java index 660320e..bbd556f 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatSplit.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatSplit.java @@ -37,153 +37,153 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.HCatSplit} instead */ public class HCatSplit extends InputSplit - implements Writable, org.apache.hadoop.mapred.InputSplit { - - private static final Logger LOG = LoggerFactory.getLogger(HCatSplit.class); - /** The partition info for the split. */ - private PartInfo partitionInfo; - - /** The split returned by the underlying InputFormat split. */ - private org.apache.hadoop.mapred.InputSplit baseMapRedSplit; - - /** The schema for the HCatTable */ - private HCatSchema tableSchema; - - private HiveConf hiveConf; - - /** - * Instantiates a new hcat split. - */ - public HCatSplit() { - } - - /** - * Instantiates a new hcat split. - * - * @param partitionInfo the partition info - * @param baseMapRedSplit the base mapred split - * @param tableSchema the table level schema - */ - public HCatSplit(PartInfo partitionInfo, - org.apache.hadoop.mapred.InputSplit baseMapRedSplit, - HCatSchema tableSchema) { - - this.partitionInfo = partitionInfo; - // dataSchema can be obtained from partitionInfo.getPartitionSchema() - this.baseMapRedSplit = baseMapRedSplit; - this.tableSchema = tableSchema; - } - - /** - * Gets the partition info. - * @return the partitionInfo - */ - public PartInfo getPartitionInfo() { - return partitionInfo; + implements Writable, org.apache.hadoop.mapred.InputSplit { + + private static final Logger LOG = LoggerFactory.getLogger(HCatSplit.class); + /** The partition info for the split. */ + private PartInfo partitionInfo; + + /** The split returned by the underlying InputFormat split. */ + private org.apache.hadoop.mapred.InputSplit baseMapRedSplit; + + /** The schema for the HCatTable */ + private HCatSchema tableSchema; + + private HiveConf hiveConf; + + /** + * Instantiates a new hcat split. + */ + public HCatSplit() { + } + + /** + * Instantiates a new hcat split. + * + * @param partitionInfo the partition info + * @param baseMapRedSplit the base mapred split + * @param tableSchema the table level schema + */ + public HCatSplit(PartInfo partitionInfo, + org.apache.hadoop.mapred.InputSplit baseMapRedSplit, + HCatSchema tableSchema) { + + this.partitionInfo = partitionInfo; + // dataSchema can be obtained from partitionInfo.getPartitionSchema() + this.baseMapRedSplit = baseMapRedSplit; + this.tableSchema = tableSchema; + } + + /** + * Gets the partition info. + * @return the partitionInfo + */ + public PartInfo getPartitionInfo() { + return partitionInfo; + } + + /** + * Gets the underlying InputSplit. + * @return the baseMapRedSplit + */ + public org.apache.hadoop.mapred.InputSplit getBaseSplit() { + return baseMapRedSplit; + } + + /** + * Gets the data schema. + * @return the table schema + */ + public HCatSchema getDataSchema() { + return this.partitionInfo.getPartitionSchema(); + } + + /** + * Gets the table schema. + * @return the table schema + */ + public HCatSchema getTableSchema() { + return this.tableSchema; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.InputSplit#getLength() + */ + @Override + public long getLength() { + try { + return baseMapRedSplit.getLength(); + } catch (IOException e) { + LOG.warn("Exception in HCatSplit", e); } - - /** - * Gets the underlying InputSplit. - * @return the baseMapRedSplit - */ - public org.apache.hadoop.mapred.InputSplit getBaseSplit() { - return baseMapRedSplit; + return 0; // we errored + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() + */ + @Override + public String[] getLocations() { + try { + return baseMapRedSplit.getLocations(); + } catch (IOException e) { + LOG.warn("Exception in HCatSplit", e); } - - /** - * Gets the data schema. - * @return the table schema - */ - public HCatSchema getDataSchema() { - return this.partitionInfo.getPartitionSchema(); + return new String[0]; // we errored + } + + /* (non-Javadoc) + * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) + */ + @SuppressWarnings("unchecked") + @Override + public void readFields(DataInput input) throws IOException { + String partitionInfoString = WritableUtils.readString(input); + partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString); + + String baseSplitClassName = WritableUtils.readString(input); + org.apache.hadoop.mapred.InputSplit split; + try { + Class splitClass = + (Class) Class.forName(baseSplitClassName); + + //Class.forName().newInstance() does not work if the underlying + //InputSplit has package visibility + Constructor + constructor = + splitClass.getDeclaredConstructor(new Class[]{}); + constructor.setAccessible(true); + + split = constructor.newInstance(); + // read baseSplit from input + ((Writable) split).readFields(input); + this.baseMapRedSplit = split; + } catch (Exception e) { + throw new IOException("Exception from " + baseSplitClassName, e); } - /** - * Gets the table schema. - * @return the table schema - */ - public HCatSchema getTableSchema() { - return this.tableSchema; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.InputSplit#getLength() - */ - @Override - public long getLength() { - try { - return baseMapRedSplit.getLength(); - } catch (IOException e) { - LOG.warn("Exception in HCatSplit", e); - } - return 0; // we errored - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() - */ - @Override - public String[] getLocations() { - try { - return baseMapRedSplit.getLocations(); - } catch (IOException e) { - LOG.warn("Exception in HCatSplit", e); - } - return new String[0]; // we errored - } - - /* (non-Javadoc) - * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) - */ - @SuppressWarnings("unchecked") - @Override - public void readFields(DataInput input) throws IOException { - String partitionInfoString = WritableUtils.readString(input); - partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString); - - String baseSplitClassName = WritableUtils.readString(input); - org.apache.hadoop.mapred.InputSplit split; - try { - Class splitClass = - (Class) Class.forName(baseSplitClassName); - - //Class.forName().newInstance() does not work if the underlying - //InputSplit has package visibility - Constructor - constructor = - splitClass.getDeclaredConstructor(new Class[]{}); - constructor.setAccessible(true); - - split = constructor.newInstance(); - // read baseSplit from input - ((Writable) split).readFields(input); - this.baseMapRedSplit = split; - } catch (Exception e) { - throw new IOException("Exception from " + baseSplitClassName, e); - } - - String tableSchemaString = WritableUtils.readString(input); - tableSchema = (HCatSchema) HCatUtil.deserialize(tableSchemaString); - } - - /* (non-Javadoc) - * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) - */ - @Override - public void write(DataOutput output) throws IOException { - String partitionInfoString = HCatUtil.serialize(partitionInfo); - - // write partitionInfo into output - WritableUtils.writeString(output, partitionInfoString); - - WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); - Writable baseSplitWritable = (Writable) baseMapRedSplit; - //write baseSplit into output - baseSplitWritable.write(output); - - //write the table schema into output - String tableSchemaString = HCatUtil.serialize(tableSchema); - WritableUtils.writeString(output, tableSchemaString); - } + String tableSchemaString = WritableUtils.readString(input); + tableSchema = (HCatSchema) HCatUtil.deserialize(tableSchemaString); + } + + /* (non-Javadoc) + * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) + */ + @Override + public void write(DataOutput output) throws IOException { + String partitionInfoString = HCatUtil.serialize(partitionInfo); + + // write partitionInfo into output + WritableUtils.writeString(output, partitionInfoString); + + WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); + Writable baseSplitWritable = (Writable) baseMapRedSplit; + //write baseSplit into output + baseSplitWritable.write(output); + + //write the table schema into output + String tableSchemaString = HCatUtil.serialize(tableSchema); + WritableUtils.writeString(output, tableSchemaString); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatStorageHandler.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatStorageHandler.java index 05adb54..4ed82df 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatStorageHandler.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatStorageHandler.java @@ -35,87 +35,87 @@ */ public abstract class HCatStorageHandler implements HiveStorageHandler { - //TODO move this to HiveStorageHandler + //TODO move this to HiveStorageHandler - /** - * This method is called to allow the StorageHandlers the chance - * to populate the JobContext.getConfiguration() with properties that - * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). - * Key value pairs passed into jobProperties is guaranteed to be set in the job's - * configuration object. User's can retrieve "context" information from tableDesc. - * User's should avoid mutating tableDesc and only make changes in jobProperties. - * This method is expected to be idempotent such that a job called with the - * same tableDesc values should return the same key-value pairs in jobProperties. - * Any external state set by this method should remain the same if this method is - * called again. It is up to the user to determine how best guarantee this invariant. - * - * This method in particular is to create a configuration for input. - * @param tableDesc - * @param jobProperties - */ - public abstract void configureInputJobProperties(TableDesc tableDesc, Map jobProperties); + /** + * This method is called to allow the StorageHandlers the chance + * to populate the JobContext.getConfiguration() with properties that + * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). + * Key value pairs passed into jobProperties is guaranteed to be set in the job's + * configuration object. User's can retrieve "context" information from tableDesc. + * User's should avoid mutating tableDesc and only make changes in jobProperties. + * This method is expected to be idempotent such that a job called with the + * same tableDesc values should return the same key-value pairs in jobProperties. + * Any external state set by this method should remain the same if this method is + * called again. It is up to the user to determine how best guarantee this invariant. + * + * This method in particular is to create a configuration for input. + * @param tableDesc + * @param jobProperties + */ + public abstract void configureInputJobProperties(TableDesc tableDesc, Map jobProperties); - //TODO move this to HiveStorageHandler + //TODO move this to HiveStorageHandler - /** - * This method is called to allow the StorageHandlers the chance - * to populate the JobContext.getConfiguration() with properties that - * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). - * Key value pairs passed into jobProperties is guaranteed to be set in the job's - * configuration object. User's can retrieve "context" information from tableDesc. - * User's should avoid mutating tableDesc and only make changes in jobProperties. - * This method is expected to be idempotent such that a job called with the - * same tableDesc values should return the same key-value pairs in jobProperties. - * Any external state set by this method should remain the same if this method is - * called again. It is up to the user to determine how best guarantee this invariant. - * - * This method in particular is to create a configuration for output. - * @param tableDesc - * @param jobProperties - */ - public abstract void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties); + /** + * This method is called to allow the StorageHandlers the chance + * to populate the JobContext.getConfiguration() with properties that + * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). + * Key value pairs passed into jobProperties is guaranteed to be set in the job's + * configuration object. User's can retrieve "context" information from tableDesc. + * User's should avoid mutating tableDesc and only make changes in jobProperties. + * This method is expected to be idempotent such that a job called with the + * same tableDesc values should return the same key-value pairs in jobProperties. + * Any external state set by this method should remain the same if this method is + * called again. It is up to the user to determine how best guarantee this invariant. + * + * This method in particular is to create a configuration for output. + * @param tableDesc + * @param jobProperties + */ + public abstract void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties); - /** - * - * - * @return authorization provider - * @throws HiveException - */ - public abstract HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException; + /** + * + * + * @return authorization provider + * @throws HiveException + */ + public abstract HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException; - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.hive.ql.metadata.HiveStorageHandler# - * configureTableJobProperties(org.apache.hadoop.hive.ql.plan.TableDesc, - * java.util.Map) - */ - @Override - @Deprecated - public final void configureTableJobProperties(TableDesc tableDesc, - Map jobProperties) { - } + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.hive.ql.metadata.HiveStorageHandler# + * configureTableJobProperties(org.apache.hadoop.hive.ql.plan.TableDesc, + * java.util.Map) + */ + @Override + @Deprecated + public final void configureTableJobProperties(TableDesc tableDesc, + Map jobProperties) { + } - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.conf.Configurable#getConf() - */ - @Override - public abstract Configuration getConf(); + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.conf.Configurable#getConf() + */ + @Override + public abstract Configuration getConf(); - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf. - * Configuration) - */ - @Override - public abstract void setConf(Configuration conf); + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf. + * Configuration) + */ + @Override + public abstract void setConf(Configuration conf); - OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { - return new DefaultOutputFormatContainer(outputFormat); - } + OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { + return new DefaultOutputFormatContainer(outputFormat); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java index 7484b0b..739c102 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java @@ -37,152 +37,152 @@ public class HCatTableInfo implements Serializable { - private static final long serialVersionUID = 1L; - - /** The db and table names */ - private final String databaseName; - private final String tableName; - - /** The table schema. */ - private final HCatSchema dataColumns; - private final HCatSchema partitionColumns; - - /** The table being written to */ - private final Table table; - - /** The storer info */ - private StorerInfo storerInfo; - - /** - * Initializes a new HCatTableInfo instance to be used with {@link HCatInputFormat} - * for reading data from a table. - * work with hadoop security, the kerberos principal name of the server - else null - * The principal name should be of the form: - * /_HOST@ like "hcat/_HOST@myrealm.com" - * The special string _HOST will be replaced automatically with the correct host name - * @param databaseName the db name - * @param tableName the table name - * @param dataColumns schema of columns which contain data - * @param partitionColumns schema of partition columns - * @param storerInfo information about storage descriptor - * @param table hive metastore table class - */ - HCatTableInfo( - String databaseName, - String tableName, - HCatSchema dataColumns, - HCatSchema partitionColumns, - StorerInfo storerInfo, - Table table) { - this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.dataColumns = dataColumns; - this.table = table; - this.storerInfo = storerInfo; - this.partitionColumns = partitionColumns; - } - - /** - * Gets the value of databaseName - * @return the databaseName - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * Gets the value of tableName - * @return the tableName - */ - public String getTableName() { - return tableName; - } - - /** - * @return return schema of data columns as defined in meta store - */ - public HCatSchema getDataColumns() { - return dataColumns; - } - - /** - * @return schema of partition columns - */ - public HCatSchema getPartitionColumns() { - return partitionColumns; - } - - /** - * @return the storerInfo - */ - public StorerInfo getStorerInfo() { - return storerInfo; - } - - public String getTableLocation() { - return table.getSd().getLocation(); - } - - /** - * minimize dependency on hive classes so this is package private - * this should eventually no longer be used - * @return hive metastore representation of table - */ - Table getTable() { - return table; - } - - /** - * create an HCatTableInfo instance from the supplied Hive Table instance - * @param table to create an instance from - * @return HCatTableInfo - * @throws IOException - */ - static HCatTableInfo valueOf(Table table) throws IOException { - // Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema, - // but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized - // into the job conf. - org.apache.hadoop.hive.ql.metadata.Table mTable = - new org.apache.hadoop.hive.ql.metadata.Table(table); - HCatSchema schema = HCatUtil.extractSchema(mTable); - StorerInfo storerInfo = - InternalUtil.extractStorerInfo(table.getSd(), table.getParameters()); - HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable); - return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, - partitionColumns, storerInfo, table); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - HCatTableInfo tableInfo = (HCatTableInfo) o; - - if (dataColumns != null ? !dataColumns.equals(tableInfo.dataColumns) : tableInfo.dataColumns != null) - return false; - if (databaseName != null ? !databaseName.equals(tableInfo.databaseName) : tableInfo.databaseName != null) - return false; - if (partitionColumns != null ? !partitionColumns.equals(tableInfo.partitionColumns) : tableInfo.partitionColumns != null) - return false; - if (storerInfo != null ? !storerInfo.equals(tableInfo.storerInfo) : tableInfo.storerInfo != null) return false; - if (table != null ? !table.equals(tableInfo.table) : tableInfo.table != null) return false; - if (tableName != null ? !tableName.equals(tableInfo.tableName) : tableInfo.tableName != null) return false; - - return true; - } - - - @Override - public int hashCode() { - int result = databaseName != null ? databaseName.hashCode() : 0; - result = 31 * result + (tableName != null ? tableName.hashCode() : 0); - result = 31 * result + (dataColumns != null ? dataColumns.hashCode() : 0); - result = 31 * result + (partitionColumns != null ? partitionColumns.hashCode() : 0); - result = 31 * result + (table != null ? table.hashCode() : 0); - result = 31 * result + (storerInfo != null ? storerInfo.hashCode() : 0); - return result; - } + private static final long serialVersionUID = 1L; + + /** The db and table names */ + private final String databaseName; + private final String tableName; + + /** The table schema. */ + private final HCatSchema dataColumns; + private final HCatSchema partitionColumns; + + /** The table being written to */ + private final Table table; + + /** The storer info */ + private StorerInfo storerInfo; + + /** + * Initializes a new HCatTableInfo instance to be used with {@link HCatInputFormat} + * for reading data from a table. + * work with hadoop security, the kerberos principal name of the server - else null + * The principal name should be of the form: + * /_HOST@ like "hcat/_HOST@myrealm.com" + * The special string _HOST will be replaced automatically with the correct host name + * @param databaseName the db name + * @param tableName the table name + * @param dataColumns schema of columns which contain data + * @param partitionColumns schema of partition columns + * @param storerInfo information about storage descriptor + * @param table hive metastore table class + */ + HCatTableInfo( + String databaseName, + String tableName, + HCatSchema dataColumns, + HCatSchema partitionColumns, + StorerInfo storerInfo, + Table table) { + this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.dataColumns = dataColumns; + this.table = table; + this.storerInfo = storerInfo; + this.partitionColumns = partitionColumns; + } + + /** + * Gets the value of databaseName + * @return the databaseName + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * Gets the value of tableName + * @return the tableName + */ + public String getTableName() { + return tableName; + } + + /** + * @return return schema of data columns as defined in meta store + */ + public HCatSchema getDataColumns() { + return dataColumns; + } + + /** + * @return schema of partition columns + */ + public HCatSchema getPartitionColumns() { + return partitionColumns; + } + + /** + * @return the storerInfo + */ + public StorerInfo getStorerInfo() { + return storerInfo; + } + + public String getTableLocation() { + return table.getSd().getLocation(); + } + + /** + * minimize dependency on hive classes so this is package private + * this should eventually no longer be used + * @return hive metastore representation of table + */ + Table getTable() { + return table; + } + + /** + * create an HCatTableInfo instance from the supplied Hive Table instance + * @param table to create an instance from + * @return HCatTableInfo + * @throws IOException + */ + static HCatTableInfo valueOf(Table table) throws IOException { + // Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema, + // but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized + // into the job conf. + org.apache.hadoop.hive.ql.metadata.Table mTable = + new org.apache.hadoop.hive.ql.metadata.Table(table); + HCatSchema schema = HCatUtil.extractSchema(mTable); + StorerInfo storerInfo = + InternalUtil.extractStorerInfo(table.getSd(), table.getParameters()); + HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable); + return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, + partitionColumns, storerInfo, table); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + HCatTableInfo tableInfo = (HCatTableInfo) o; + + if (dataColumns != null ? !dataColumns.equals(tableInfo.dataColumns) : tableInfo.dataColumns != null) + return false; + if (databaseName != null ? !databaseName.equals(tableInfo.databaseName) : tableInfo.databaseName != null) + return false; + if (partitionColumns != null ? !partitionColumns.equals(tableInfo.partitionColumns) : tableInfo.partitionColumns != null) + return false; + if (storerInfo != null ? !storerInfo.equals(tableInfo.storerInfo) : tableInfo.storerInfo != null) return false; + if (table != null ? !table.equals(tableInfo.table) : tableInfo.table != null) return false; + if (tableName != null ? !tableName.equals(tableInfo.tableName) : tableInfo.tableName != null) return false; + + return true; + } + + + @Override + public int hashCode() { + int result = databaseName != null ? databaseName.hashCode() : 0; + result = 31 * result + (tableName != null ? tableName.hashCode() : 0); + result = 31 * result + (dataColumns != null ? dataColumns.hashCode() : 0); + result = 31 * result + (partitionColumns != null ? partitionColumns.hashCode() : 0); + result = 31 * result + (table != null ? table.hashCode() : 0); + result = 31 * result + (storerInfo != null ? storerInfo.hashCode() : 0); + return result; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java index 5f6bc9b..5b4c908 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java @@ -49,124 +49,124 @@ */ class InitializeInput { - private static final Logger LOG = LoggerFactory.getLogger(InitializeInput.class); - - /** - * @see org.apache.hcatalog.mapreduce.InitializeInput#setInput(org.apache.hadoop.conf.Configuration, InputJobInfo) - */ - public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception { - setInput(job.getConfiguration(), theirInputJobInfo); - } - - /** - * Set the input to use for the Job. This queries the metadata server with the specified - * partition predicates, gets the matching partitions, and puts the information in the job - * configuration object. - * - * To ensure a known InputJobInfo state, only the database name, table name, filter, and - * properties are preserved. All other modification from the given InputJobInfo are discarded. - * - * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: - * {code} - * InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( - * job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); - * {code} - * - * @param conf the job Configuration object - * @param theirInputJobInfo information on the Input to read - * @throws Exception - */ - public static void setInput(Configuration conf, - InputJobInfo theirInputJobInfo) throws Exception { - InputJobInfo inputJobInfo = InputJobInfo.create( - theirInputJobInfo.getDatabaseName(), - theirInputJobInfo.getTableName(), - theirInputJobInfo.getFilter(), - theirInputJobInfo.getProperties()); - conf.set( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(getInputJobInfo(conf, inputJobInfo, null))); - } + private static final Logger LOG = LoggerFactory.getLogger(InitializeInput.class); + + /** + * @see org.apache.hcatalog.mapreduce.InitializeInput#setInput(org.apache.hadoop.conf.Configuration, InputJobInfo) + */ + public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception { + setInput(job.getConfiguration(), theirInputJobInfo); + } + + /** + * Set the input to use for the Job. This queries the metadata server with the specified + * partition predicates, gets the matching partitions, and puts the information in the job + * configuration object. + * + * To ensure a known InputJobInfo state, only the database name, table name, filter, and + * properties are preserved. All other modification from the given InputJobInfo are discarded. + * + * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: + * {code} + * InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( + * job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + * {code} + * + * @param conf the job Configuration object + * @param theirInputJobInfo information on the Input to read + * @throws Exception + */ + public static void setInput(Configuration conf, + InputJobInfo theirInputJobInfo) throws Exception { + InputJobInfo inputJobInfo = InputJobInfo.create( + theirInputJobInfo.getDatabaseName(), + theirInputJobInfo.getTableName(), + theirInputJobInfo.getFilter(), + theirInputJobInfo.getProperties()); + conf.set( + HCatConstants.HCAT_KEY_JOB_INFO, + HCatUtil.serialize(getInputJobInfo(conf, inputJobInfo, null))); + } + + /** + * Returns the given InputJobInfo after populating with data queried from the metadata service. + */ + private static InputJobInfo getInputJobInfo( + Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception { + HiveMetaStoreClient client = null; + HiveConf hiveConf = null; + try { + if (conf != null) { + hiveConf = HCatUtil.getHiveConf(conf); + } else { + hiveConf = new HiveConf(HCatInputFormat.class); + } + client = HCatUtil.getHiveClient(hiveConf); + Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName()); + + List partInfoList = new ArrayList(); + + inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); + if (table.getPartitionKeys().size() != 0) { + //Partitioned table + List parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + inputJobInfo.getFilter(), + (short) -1); + + // Default to 100,000 partitions if hive.metastore.maxpartition is not defined + int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000); + if (parts != null && parts.size() > maxPart) { + throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size()); + } - /** - * Returns the given InputJobInfo after populating with data queried from the metadata service. - */ - private static InputJobInfo getInputJobInfo( - Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception { - HiveMetaStoreClient client = null; - HiveConf hiveConf = null; - try { - if (conf != null) { - hiveConf = HCatUtil.getHiveConf(conf); - } else { - hiveConf = new HiveConf(HCatInputFormat.class); - } - client = HCatUtil.getHiveClient(hiveConf); - Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName()); - - List partInfoList = new ArrayList(); - - inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); - if (table.getPartitionKeys().size() != 0) { - //Partitioned table - List parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - inputJobInfo.getFilter(), - (short) -1); - - // Default to 100,000 partitions if hive.metastore.maxpartition is not defined - int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000); - if (parts != null && parts.size() > maxPart) { - throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size()); - } - - // populate partition info - for (Partition ptn : parts) { - HCatSchema schema = HCatUtil.extractSchema( - new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn)); - PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), - ptn.getParameters(), conf, inputJobInfo); - partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); - partInfoList.add(partInfo); - } - - } else { - //Non partitioned table - HCatSchema schema = HCatUtil.extractSchema(table); - PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), - table.getParameters(), conf, inputJobInfo); - partInfo.setPartitionValues(new HashMap()); - partInfoList.add(partInfo); - } - inputJobInfo.setPartitions(partInfoList); - - return inputJobInfo; - } finally { - HCatUtil.closeHiveClientQuietly(client); + // populate partition info + for (Partition ptn : parts) { + HCatSchema schema = HCatUtil.extractSchema( + new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn)); + PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), + ptn.getParameters(), conf, inputJobInfo); + partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); + partInfoList.add(partInfo); } + } else { + //Non partitioned table + HCatSchema schema = HCatUtil.extractSchema(table); + PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), + table.getParameters(), conf, inputJobInfo); + partInfo.setPartitionValues(new HashMap()); + partInfoList.add(partInfo); + } + inputJobInfo.setPartitions(partInfoList); + + return inputJobInfo; + } finally { + HCatUtil.closeHiveClientQuietly(client); } - private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, - Map parameters, Configuration conf, - InputJobInfo inputJobInfo) throws IOException { + } - StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); + private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, + Map parameters, Configuration conf, + InputJobInfo inputJobInfo) throws IOException { - Properties hcatProperties = new Properties(); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); + StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); - // copy the properties from storageHandler to jobProperties - Map jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); + Properties hcatProperties = new Properties(); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); - for (String key : parameters.keySet()) { - hcatProperties.put(key, parameters.get(key)); - } - // FIXME - // Bloating partinfo with inputJobInfo is not good - return new PartInfo(schema, storageHandler, sd.getLocation(), - hcatProperties, jobProperties, inputJobInfo.getTableInfo()); + // copy the properties from storageHandler to jobProperties + Map jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); + + for (String key : parameters.keySet()) { + hcatProperties.put(key, parameters.get(key)); } + // FIXME + // Bloating partinfo with inputJobInfo is not good + return new PartInfo(schema, storageHandler, sd.getLocation(), + hcatProperties, jobProperties, inputJobInfo.getTableInfo()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InputJobInfo.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InputJobInfo.java index ed8a501..8e3b2c2 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InputJobInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InputJobInfo.java @@ -46,157 +46,157 @@ @InterfaceStability.Evolving public class InputJobInfo implements Serializable { - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The db and table names. */ - private final String databaseName; - private final String tableName; - - /** meta information of the table to be read from */ - private HCatTableInfo tableInfo; - - /** The partition filter */ - private String filter; - - /** The list of partitions matching the filter. */ - transient private List partitions; - - /** implementation specific job properties */ - private Properties properties; - - /** - * Initializes a new InputJobInfo - * for reading data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param filter the partition filter - * @param properties implementation specific job properties - */ - public static InputJobInfo create(String databaseName, - String tableName, - String filter, - Properties properties) { - return new InputJobInfo(databaseName, tableName, filter, properties); - } - - /** - * Initializes a new InputJobInfo - * for reading data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param filter the partition filter - */ - @Deprecated - public static InputJobInfo create(String databaseName, - String tableName, - String filter) { - return create(databaseName, tableName, filter, null); - } - - - private InputJobInfo(String databaseName, - String tableName, - String filter, - Properties properties) { - this.databaseName = (databaseName == null) ? - MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.filter = filter; - this.properties = properties == null ? new Properties() : properties; - } - - /** - * Gets the value of databaseName - * @return the databaseName - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * Gets the value of tableName - * @return the tableName - */ - public String getTableName() { - return tableName; - } - - /** - * Gets the table's meta information - * @return the HCatTableInfo - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } - - /** - * set the tablInfo instance - * this should be the same instance - * determined by this object's DatabaseName and TableName - * @param tableInfo - */ - void setTableInfo(HCatTableInfo tableInfo) { - this.tableInfo = tableInfo; - } - - /** - * Gets the value of partition filter - * @return the filter string - */ - public String getFilter() { - return filter; - } - - /** - * @return partition info - */ - public List getPartitions() { - return partitions; - } - - /** - * @return partition info list - */ - void setPartitions(List partitions) { - this.partitions = partitions; - } - - /** - * Set/Get Property information to be passed down to *StorageHandler implementation - * put implementation specific storage handler configurations here - * @return the implementation specific job properties - */ - public Properties getProperties() { - return properties; - } - - /** - * Serialize this object, compressing the partitions which can exceed the - * allowed jobConf size. - * @see HCATALOG-453 - */ - private void writeObject(ObjectOutputStream oos) - throws IOException { - oos.defaultWriteObject(); - Deflater def = new Deflater(Deflater.BEST_COMPRESSION); - ObjectOutputStream partInfoWriter = - new ObjectOutputStream(new DeflaterOutputStream(oos, def)); - partInfoWriter.writeObject(partitions); - partInfoWriter.close(); - } - - /** - * Deserialize this object, decompressing the partitions which can exceed the - * allowed jobConf size. - * @see HCATALOG-453 - */ - @SuppressWarnings("unchecked") - private void readObject(ObjectInputStream ois) - throws IOException, ClassNotFoundException { - ois.defaultReadObject(); - ObjectInputStream partInfoReader = - new ObjectInputStream(new InflaterInputStream(ois)); - partitions = (List)partInfoReader.readObject(); - } + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The db and table names. */ + private final String databaseName; + private final String tableName; + + /** meta information of the table to be read from */ + private HCatTableInfo tableInfo; + + /** The partition filter */ + private String filter; + + /** The list of partitions matching the filter. */ + transient private List partitions; + + /** implementation specific job properties */ + private Properties properties; + + /** + * Initializes a new InputJobInfo + * for reading data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param filter the partition filter + * @param properties implementation specific job properties + */ + public static InputJobInfo create(String databaseName, + String tableName, + String filter, + Properties properties) { + return new InputJobInfo(databaseName, tableName, filter, properties); + } + + /** + * Initializes a new InputJobInfo + * for reading data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param filter the partition filter + */ + @Deprecated + public static InputJobInfo create(String databaseName, + String tableName, + String filter) { + return create(databaseName, tableName, filter, null); + } + + + private InputJobInfo(String databaseName, + String tableName, + String filter, + Properties properties) { + this.databaseName = (databaseName == null) ? + MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.filter = filter; + this.properties = properties == null ? new Properties() : properties; + } + + /** + * Gets the value of databaseName + * @return the databaseName + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * Gets the value of tableName + * @return the tableName + */ + public String getTableName() { + return tableName; + } + + /** + * Gets the table's meta information + * @return the HCatTableInfo + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } + + /** + * set the tablInfo instance + * this should be the same instance + * determined by this object's DatabaseName and TableName + * @param tableInfo + */ + void setTableInfo(HCatTableInfo tableInfo) { + this.tableInfo = tableInfo; + } + + /** + * Gets the value of partition filter + * @return the filter string + */ + public String getFilter() { + return filter; + } + + /** + * @return partition info + */ + public List getPartitions() { + return partitions; + } + + /** + * @return partition info list + */ + void setPartitions(List partitions) { + this.partitions = partitions; + } + + /** + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here + * @return the implementation specific job properties + */ + public Properties getProperties() { + return properties; + } + + /** + * Serialize this object, compressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + oos.defaultWriteObject(); + Deflater def = new Deflater(Deflater.BEST_COMPRESSION); + ObjectOutputStream partInfoWriter = + new ObjectOutputStream(new DeflaterOutputStream(oos, def)); + partInfoWriter.writeObject(partitions); + partInfoWriter.close(); + } + + /** + * Deserialize this object, decompressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + @SuppressWarnings("unchecked") + private void readObject(ObjectInputStream ois) + throws IOException, ClassNotFoundException { + ois.defaultReadObject(); + ObjectInputStream partInfoReader = + new ObjectInputStream(new InflaterInputStream(ois)); + partitions = (List)partInfoReader.readObject(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java index 4978b21..2147910 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java @@ -60,162 +60,162 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.InternalUtil} instead */ class InternalUtil { - private static final Logger LOG = LoggerFactory.getLogger(InternalUtil.class); + private static final Logger LOG = LoggerFactory.getLogger(InternalUtil.class); - static StorerInfo extractStorerInfo(StorageDescriptor sd, Map properties) throws IOException { - Properties hcatProperties = new Properties(); - for (String key : properties.keySet()) { - hcatProperties.put(key, properties.get(key)); - } - - // also populate with StorageDescriptor->SerDe.Parameters - for (Map.Entry param : - sd.getSerdeInfo().getParameters().entrySet()) { - hcatProperties.put(param.getKey(), param.getValue()); - } - - - return new StorerInfo( - sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), - hcatProperties); + static StorerInfo extractStorerInfo(StorageDescriptor sd, Map properties) throws IOException { + Properties hcatProperties = new Properties(); + for (String key : properties.keySet()) { + hcatProperties.put(key, properties.get(key)); } - static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { - - if (outputSchema == null) { - throw new IOException("Invalid output schema specified"); - } + // also populate with StorageDescriptor->SerDe.Parameters + for (Map.Entry param : + sd.getSerdeInfo().getParameters().entrySet()) { + hcatProperties.put(param.getKey(), param.getValue()); + } - List fieldInspectors = new ArrayList(); - List fieldNames = new ArrayList(); - for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { - TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); + return new StorerInfo( + sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), + properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), + hcatProperties); + } - fieldNames.add(hcatFieldSchema.getName()); - fieldInspectors.add(getObjectInspector(type)); - } + static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { - StructObjectInspector structInspector = ObjectInspectorFactory. - getStandardStructObjectInspector(fieldNames, fieldInspectors); - return structInspector; + if (outputSchema == null) { + throw new IOException("Invalid output schema specified"); } - private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException { - - switch (type.getCategory()) { - - case PRIMITIVE: - PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; - return PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); + List fieldInspectors = new ArrayList(); + List fieldNames = new ArrayList(); - case MAP: - MapTypeInfo mapType = (MapTypeInfo) type; - MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( - getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); - return mapInspector; + for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { + TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); - case LIST: - ListTypeInfo listType = (ListTypeInfo) type; - ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( - getObjectInspector(listType.getListElementTypeInfo())); - return listInspector; + fieldNames.add(hcatFieldSchema.getName()); + fieldInspectors.add(getObjectInspector(type)); + } - case STRUCT: - StructTypeInfo structType = (StructTypeInfo) type; - List fieldTypes = structType.getAllStructFieldTypeInfos(); + StructObjectInspector structInspector = ObjectInspectorFactory. + getStandardStructObjectInspector(fieldNames, fieldInspectors); + return structInspector; + } - List fieldInspectors = new ArrayList(); - for (TypeInfo fieldType : fieldTypes) { - fieldInspectors.add(getObjectInspector(fieldType)); - } + private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException { - StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - structType.getAllStructFieldNames(), fieldInspectors); - return structInspector; + switch (type.getCategory()) { - default: - throw new IOException("Unknown field schema type"); - } - } + case PRIMITIVE: + PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; + return PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); - //TODO this has to find a better home, it's also hardcoded as default in hive would be nice - // if the default was decided by the serde - static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) - throws SerDeException { - serDe.initialize(conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema())); - } + case MAP: + MapTypeInfo mapType = (MapTypeInfo) type; + MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( + getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); + return mapInspector; - static void initializeDeserializer(Deserializer deserializer, Configuration conf, - HCatTableInfo info, HCatSchema schema) throws SerDeException { - Properties props = getSerdeProperties(info, schema); - LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); - deserializer.initialize(conf, props); - } + case LIST: + ListTypeInfo listType = (ListTypeInfo) type; + ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( + getObjectInspector(listType.getListElementTypeInfo())); + return listInspector; - private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) - throws SerDeException { - Properties props = new Properties(); - List fields = HCatUtil.getFieldSchemaList(s.getFields()); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); + case STRUCT: + StructTypeInfo structType = (StructTypeInfo) type; + List fieldTypes = structType.getAllStructFieldTypeInfos(); - // setting these props to match LazySimpleSerde - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); + List fieldInspectors = new ArrayList(); + for (TypeInfo fieldType : fieldTypes) { + fieldInspectors.add(getObjectInspector(fieldType)); + } - //add props from params set in table schema - props.putAll(info.getStorerInfo().getProperties()); + StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( + structType.getAllStructFieldNames(), fieldInspectors); + return structInspector; - return props; + default: + throw new IOException("Unknown field schema type"); } - - static Reporter createReporter(TaskAttemptContext context) { - return new ProgressReporter(context); + } + + //TODO this has to find a better home, it's also hardcoded as default in hive would be nice + // if the default was decided by the serde + static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) + throws SerDeException { + serDe.initialize(conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema())); + } + + static void initializeDeserializer(Deserializer deserializer, Configuration conf, + HCatTableInfo info, HCatSchema schema) throws SerDeException { + Properties props = getSerdeProperties(info, schema); + LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); + deserializer.initialize(conf, props); + } + + private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) + throws SerDeException { + Properties props = new Properties(); + List fields = HCatUtil.getFieldSchemaList(s.getFields()); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, + MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, + MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); + + // setting these props to match LazySimpleSerde + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); + + //add props from params set in table schema + props.putAll(info.getStorerInfo().getProperties()); + + return props; + } + + static Reporter createReporter(TaskAttemptContext context) { + return new ProgressReporter(context); + } + + /** + * Casts an InputSplit into a HCatSplit, providing a useful error message if the cast fails. + * @param split the InputSplit + * @return the HCatSplit + * @throws IOException + */ + public static HCatSplit castToHCatSplit(InputSplit split) throws IOException { + if (split instanceof HCatSplit) { + return (HCatSplit) split; + } else { + throw new IOException("Split must be " + HCatSplit.class.getName() + + " but found " + split.getClass().getName()); } - - /** - * Casts an InputSplit into a HCatSplit, providing a useful error message if the cast fails. - * @param split the InputSplit - * @return the HCatSplit - * @throws IOException - */ - public static HCatSplit castToHCatSplit(InputSplit split) throws IOException { - if (split instanceof HCatSplit) { - return (HCatSplit) split; - } else { - throw new IOException("Split must be " + HCatSplit.class.getName() - + " but found " + split.getClass().getName()); - } + } + + + static Map createPtnKeyValueMap(Table table, Partition ptn) + throws IOException { + List values = ptn.getValues(); + if (values.size() != table.getPartitionKeys().size()) { + throw new IOException( + "Partition values in partition inconsistent with table definition, table " + + table.getTableName() + " has " + + table.getPartitionKeys().size() + + " partition keys, partition has " + values.size() + + "partition values"); } + Map ptnKeyValues = new HashMap(); - static Map createPtnKeyValueMap(Table table, Partition ptn) - throws IOException { - List values = ptn.getValues(); - if (values.size() != table.getPartitionKeys().size()) { - throw new IOException( - "Partition values in partition inconsistent with table definition, table " - + table.getTableName() + " has " - + table.getPartitionKeys().size() - + " partition keys, partition has " + values.size() - + "partition values"); - } - - Map ptnKeyValues = new HashMap(); - - int i = 0; - for (FieldSchema schema : table.getPartitionKeys()) { - // CONCERN : the way this mapping goes, the order *needs* to be - // preserved for table.getPartitionKeys() and ptn.getValues() - ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); - i++; - } - - return ptnKeyValues; + int i = 0; + for (FieldSchema schema : table.getPartitionKeys()) { + // CONCERN : the way this mapping goes, the order *needs* to be + // preserved for table.getPartitionKeys() and ptn.getValues() + ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); + i++; } + + return ptnKeyValues; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/MultiOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/MultiOutputFormat.java index d6da8dd..879cd06 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/MultiOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/MultiOutputFormat.java @@ -137,484 +137,484 @@ */ public class MultiOutputFormat extends OutputFormat { - private static final Logger LOGGER = LoggerFactory.getLogger(MultiOutputFormat.class.getName()); - private static final String MO_ALIASES = "mapreduce.multiout.aliases"; - private static final String MO_ALIAS = "mapreduce.multiout.alias"; - private static final String CONF_KEY_DELIM = "%%"; - private static final String CONF_VALUE_DELIM = ";;"; - private static final String COMMA_DELIM = ","; - private static final List configsToOverride = new ArrayList(); - private static final Map configsToMerge = new HashMap(); - - static { - configsToOverride.add("mapred.output.dir"); - configsToOverride.add(ShimLoader.getHadoopShims().getHCatShim().getPropertyName(HadoopShims.HCatHadoopShims.PropertyName.CACHE_SYMLINK)); - configsToMerge.put(JobContext.JOB_NAMENODES, COMMA_DELIM); - configsToMerge.put("tmpfiles", COMMA_DELIM); - configsToMerge.put("tmpjars", COMMA_DELIM); - configsToMerge.put("tmparchives", COMMA_DELIM); - configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName(HadoopShims.HCatHadoopShims.PropertyName.CACHE_ARCHIVES), COMMA_DELIM); - configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName(HadoopShims.HCatHadoopShims.PropertyName.CACHE_FILES), COMMA_DELIM); - String fileSep; - if (HCatUtil.isHadoop23()) { - fileSep = ","; + private static final Logger LOGGER = LoggerFactory.getLogger(MultiOutputFormat.class.getName()); + private static final String MO_ALIASES = "mapreduce.multiout.aliases"; + private static final String MO_ALIAS = "mapreduce.multiout.alias"; + private static final String CONF_KEY_DELIM = "%%"; + private static final String CONF_VALUE_DELIM = ";;"; + private static final String COMMA_DELIM = ","; + private static final List configsToOverride = new ArrayList(); + private static final Map configsToMerge = new HashMap(); + + static { + configsToOverride.add("mapred.output.dir"); + configsToOverride.add(ShimLoader.getHadoopShims().getHCatShim().getPropertyName(HadoopShims.HCatHadoopShims.PropertyName.CACHE_SYMLINK)); + configsToMerge.put(JobContext.JOB_NAMENODES, COMMA_DELIM); + configsToMerge.put("tmpfiles", COMMA_DELIM); + configsToMerge.put("tmpjars", COMMA_DELIM); + configsToMerge.put("tmparchives", COMMA_DELIM); + configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName(HadoopShims.HCatHadoopShims.PropertyName.CACHE_ARCHIVES), COMMA_DELIM); + configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName(HadoopShims.HCatHadoopShims.PropertyName.CACHE_FILES), COMMA_DELIM); + String fileSep; + if (HCatUtil.isHadoop23()) { + fileSep = ","; + } else { + fileSep = System.getProperty("path.separator"); + } + configsToMerge.put("mapred.job.classpath.archives", fileSep); + configsToMerge.put("mapred.job.classpath.files", fileSep); + } + + /** + * Get a JobConfigurer instance that will support configuration of the job + * for multiple output formats. + * + * @param job the mapreduce job to be submitted + * @return JobConfigurer + */ + public static JobConfigurer createConfigurer(Job job) { + return JobConfigurer.create(job); + } + + /** + * Get the JobContext with the related OutputFormat configuration populated given the alias + * and the actual JobContext + * @param alias the name given to the OutputFormat configuration + * @param context the JobContext + * @return a copy of the JobContext with the alias configuration populated + */ + public static JobContext getJobContext(String alias, JobContext context) { + String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); + JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConfiguration(), context.getJobID()); + addToConfig(aliasConf, aliasContext.getConfiguration()); + return aliasContext; + } + + /** + * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias + * and the actual TaskAttemptContext + * @param alias the name given to the OutputFormat configuration + * @param context the Mapper or Reducer Context + * @return a copy of the TaskAttemptContext with the alias configuration populated + */ + public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { + String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); + TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConfiguration(), context.getTaskAttemptID()); + addToConfig(aliasConf, aliasContext.getConfiguration()); + return aliasContext; + } + + /** + * Write the output key and value using the OutputFormat defined by the + * alias. + * + * @param alias the name given to the OutputFormat configuration + * @param key the output key to be written + * @param value the output value to be written + * @param context the Mapper or Reducer Context + * @throws IOException + * @throws InterruptedException + */ + public static void write(String alias, K key, V value, TaskInputOutputContext context) + throws IOException, InterruptedException { + KeyValue keyval = new KeyValue(key, value); + context.write(new Text(alias), keyval); + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + for (String alias : getOutputFormatAliases(context)) { + LOGGER.debug("Calling checkOutputSpecs for alias: " + alias); + JobContext aliasContext = getJobContext(alias, context); + OutputFormat outputFormat = getOutputFormatInstance(aliasContext); + outputFormat.checkOutputSpecs(aliasContext); + // Copy credentials and any new config added back to JobContext + context.getCredentials().addAll(aliasContext.getCredentials()); + setAliasConf(alias, context, aliasContext); + } + } + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) + throws IOException, + InterruptedException { + return new MultiRecordWriter(context); + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, + InterruptedException { + return new MultiOutputCommitter(context); + } + + private static OutputFormat getOutputFormatInstance(JobContext context) { + OutputFormat outputFormat; + try { + outputFormat = ReflectionUtils.newInstance(context.getOutputFormatClass(), + context.getConfiguration()); + } catch (ClassNotFoundException e) { + throw new IllegalStateException(e); + } + return outputFormat; + } + + private static String[] getOutputFormatAliases(JobContext context) { + return context.getConfiguration().getStrings(MO_ALIASES); + } + + /** + * Compare the aliasContext with userJob and add the differing configuration + * as mapreduce.multiout.alias..conf to the userJob. + *

+ * Merge config like tmpjars, tmpfile, tmparchives, + * mapreduce.job.hdfs-servers that are directly handled by JobClient and add + * them to userJob. + *

+ * Add mapred.output.dir config to userJob. + * + * @param alias alias name associated with a OutputFormat + * @param userJob reference to Job that the user is going to submit + * @param aliasContext JobContext populated with OutputFormat related + * configuration. + */ + private static void setAliasConf(String alias, JobContext userJob, JobContext aliasContext) { + Configuration userConf = userJob.getConfiguration(); + StringBuilder builder = new StringBuilder(); + for (Entry conf : aliasContext.getConfiguration()) { + String key = conf.getKey(); + String value = conf.getValue(); + String jobValue = userConf.getRaw(key); + if (jobValue == null || !jobValue.equals(value)) { + if (configsToMerge.containsKey(key)) { + String mergedValue = getMergedConfValue(jobValue, value, configsToMerge.get(key)); + userConf.set(key, mergedValue); } else { - fileSep = System.getProperty("path.separator"); + if (configsToOverride.contains(key)) { + userConf.set(key, value); + } + builder.append(key).append(CONF_KEY_DELIM).append(value) + .append(CONF_VALUE_DELIM); } - configsToMerge.put("mapred.job.classpath.archives", fileSep); - configsToMerge.put("mapred.job.classpath.files", fileSep); + } } - - /** - * Get a JobConfigurer instance that will support configuration of the job - * for multiple output formats. - * - * @param job the mapreduce job to be submitted - * @return JobConfigurer - */ - public static JobConfigurer createConfigurer(Job job) { - return JobConfigurer.create(job); + if (builder.length() > CONF_VALUE_DELIM.length()) { + builder.delete(builder.length() - CONF_VALUE_DELIM.length(), builder.length()); + userConf.set(getAliasConfName(alias), builder.toString()); } + } - /** - * Get the JobContext with the related OutputFormat configuration populated given the alias - * and the actual JobContext - * @param alias the name given to the OutputFormat configuration - * @param context the JobContext - * @return a copy of the JobContext with the alias configuration populated - */ - public static JobContext getJobContext(String alias, JobContext context) { - String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); - JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConfiguration(), context.getJobID()); - addToConfig(aliasConf, aliasContext.getConfiguration()); - return aliasContext; + private static String getMergedConfValue(String originalValues, String newValues, String separator) { + if (originalValues == null) { + return newValues; } - - /** - * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias - * and the actual TaskAttemptContext - * @param alias the name given to the OutputFormat configuration - * @param context the Mapper or Reducer Context - * @return a copy of the TaskAttemptContext with the alias configuration populated - */ - public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { - String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); - TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConfiguration(), context.getTaskAttemptID()); - addToConfig(aliasConf, aliasContext.getConfiguration()); - return aliasContext; + Set mergedValues = new LinkedHashSet(); + mergedValues.addAll(Arrays.asList(StringUtils.split(originalValues, separator))); + mergedValues.addAll(Arrays.asList(StringUtils.split(newValues, separator))); + StringBuilder builder = new StringBuilder(originalValues.length() + newValues.length() + 2); + for (String value : mergedValues) { + builder.append(value).append(separator); } + return builder.substring(0, builder.length() - separator.length()); + } - /** - * Write the output key and value using the OutputFormat defined by the - * alias. - * - * @param alias the name given to the OutputFormat configuration - * @param key the output key to be written - * @param value the output value to be written - * @param context the Mapper or Reducer Context - * @throws IOException - * @throws InterruptedException - */ - public static void write(String alias, K key, V value, TaskInputOutputContext context) - throws IOException, InterruptedException { - KeyValue keyval = new KeyValue(key, value); - context.write(new Text(alias), keyval); - } + private static String getAliasConfName(String alias) { + return MO_ALIAS + "." + alias + ".conf"; + } - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - for (String alias : getOutputFormatAliases(context)) { - LOGGER.debug("Calling checkOutputSpecs for alias: " + alias); - JobContext aliasContext = getJobContext(alias, context); - OutputFormat outputFormat = getOutputFormatInstance(aliasContext); - outputFormat.checkOutputSpecs(aliasContext); - // Copy credentials and any new config added back to JobContext - context.getCredentials().addAll(aliasContext.getCredentials()); - setAliasConf(alias, context, aliasContext); - } + private static void addToConfig(String aliasConf, Configuration conf) { + String[] config = aliasConf.split(CONF_KEY_DELIM + "|" + CONF_VALUE_DELIM); + for (int i = 0; i < config.length; i += 2) { + conf.set(config[i], config[i + 1]); } + } - @Override - public RecordWriter getRecordWriter(TaskAttemptContext context) - throws IOException, - InterruptedException { - return new MultiRecordWriter(context); - } + /** + * Class that supports configuration of the job for multiple output formats. + */ + public static class JobConfigurer { - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, - InterruptedException { - return new MultiOutputCommitter(context); - } + private final Job job; + private Map outputConfigs = new LinkedHashMap(); - private static OutputFormat getOutputFormatInstance(JobContext context) { - OutputFormat outputFormat; - try { - outputFormat = ReflectionUtils.newInstance(context.getOutputFormatClass(), - context.getConfiguration()); - } catch (ClassNotFoundException e) { - throw new IllegalStateException(e); - } - return outputFormat; + private JobConfigurer(Job job) { + this.job = job; } - private static String[] getOutputFormatAliases(JobContext context) { - return context.getConfiguration().getStrings(MO_ALIASES); + private static JobConfigurer create(Job job) { + JobConfigurer configurer = new JobConfigurer(job); + return configurer; } /** - * Compare the aliasContext with userJob and add the differing configuration - * as mapreduce.multiout.alias..conf to the userJob. - *

- * Merge config like tmpjars, tmpfile, tmparchives, - * mapreduce.job.hdfs-servers that are directly handled by JobClient and add - * them to userJob. - *

- * Add mapred.output.dir config to userJob. + * Add a OutputFormat configuration to the Job with a alias name. * - * @param alias alias name associated with a OutputFormat - * @param userJob reference to Job that the user is going to submit - * @param aliasContext JobContext populated with OutputFormat related - * configuration. + * @param alias the name to be given to the OutputFormat configuration + * @param outputFormatClass OutputFormat class + * @param keyClass the key class for the output data + * @param valueClass the value class for the output data + * @throws IOException */ - private static void setAliasConf(String alias, JobContext userJob, JobContext aliasContext) { - Configuration userConf = userJob.getConfiguration(); - StringBuilder builder = new StringBuilder(); - for (Entry conf : aliasContext.getConfiguration()) { - String key = conf.getKey(); - String value = conf.getValue(); - String jobValue = userConf.getRaw(key); - if (jobValue == null || !jobValue.equals(value)) { - if (configsToMerge.containsKey(key)) { - String mergedValue = getMergedConfValue(jobValue, value, configsToMerge.get(key)); - userConf.set(key, mergedValue); - } else { - if (configsToOverride.contains(key)) { - userConf.set(key, value); - } - builder.append(key).append(CONF_KEY_DELIM).append(value) - .append(CONF_VALUE_DELIM); - } - } - } - if (builder.length() > CONF_VALUE_DELIM.length()) { - builder.delete(builder.length() - CONF_VALUE_DELIM.length(), builder.length()); - userConf.set(getAliasConfName(alias), builder.toString()); - } + public void addOutputFormat(String alias, + Class outputFormatClass, + Class keyClass, Class valueClass) throws IOException { + Job copy = new Job(this.job.getConfiguration()); + outputConfigs.put(alias, copy); + copy.setOutputFormatClass(outputFormatClass); + copy.setOutputKeyClass(keyClass); + copy.setOutputValueClass(valueClass); } - private static String getMergedConfValue(String originalValues, String newValues, String separator) { - if (originalValues == null) { - return newValues; - } - Set mergedValues = new LinkedHashSet(); - mergedValues.addAll(Arrays.asList(StringUtils.split(originalValues, separator))); - mergedValues.addAll(Arrays.asList(StringUtils.split(newValues, separator))); - StringBuilder builder = new StringBuilder(originalValues.length() + newValues.length() + 2); - for (String value : mergedValues) { - builder.append(value).append(separator); - } - return builder.substring(0, builder.length() - separator.length()); - } - - private static String getAliasConfName(String alias) { - return MO_ALIAS + "." + alias + ".conf"; - } - - private static void addToConfig(String aliasConf, Configuration conf) { - String[] config = aliasConf.split(CONF_KEY_DELIM + "|" + CONF_VALUE_DELIM); - for (int i = 0; i < config.length; i += 2) { - conf.set(config[i], config[i + 1]); - } + /** + * Get the Job configuration for a OutputFormat defined by the alias + * name. The job returned by this method should be passed to the + * OutputFormat for any configuration instead of the Job that will be + * submitted to the JobClient. + * + * @param alias the name used for the OutputFormat during + * addOutputFormat + * @return Job + */ + public Job getJob(String alias) { + Job copy = outputConfigs.get(alias); + if (copy == null) { + throw new IllegalArgumentException("OutputFormat with alias " + alias + + " has not beed added"); + } + return copy; } /** - * Class that supports configuration of the job for multiple output formats. + * Configure the job with the multiple output formats added. This method + * should be called after all the output formats have been added and + * configured and before the job submission. */ - public static class JobConfigurer { - - private final Job job; - private Map outputConfigs = new LinkedHashMap(); + public void configure() { + StringBuilder aliases = new StringBuilder(); + Configuration jobConf = job.getConfiguration(); + for (Entry entry : outputConfigs.entrySet()) { + // Copy credentials + job.getCredentials().addAll(entry.getValue().getCredentials()); + String alias = entry.getKey(); + aliases.append(alias).append(COMMA_DELIM); + // Store the differing configuration for each alias in the job + // as a setting. + setAliasConf(alias, job, entry.getValue()); + } + aliases.delete(aliases.length() - COMMA_DELIM.length(), aliases.length()); + jobConf.set(MO_ALIASES, aliases.toString()); + } - private JobConfigurer(Job job) { - this.job = job; - } + } - private static JobConfigurer create(Job job) { - JobConfigurer configurer = new JobConfigurer(job); - return configurer; - } + private static class KeyValue implements Writable { + private final K key; + private final V value; - /** - * Add a OutputFormat configuration to the Job with a alias name. - * - * @param alias the name to be given to the OutputFormat configuration - * @param outputFormatClass OutputFormat class - * @param keyClass the key class for the output data - * @param valueClass the value class for the output data - * @throws IOException - */ - public void addOutputFormat(String alias, - Class outputFormatClass, - Class keyClass, Class valueClass) throws IOException { - Job copy = new Job(this.job.getConfiguration()); - outputConfigs.put(alias, copy); - copy.setOutputFormatClass(outputFormatClass); - copy.setOutputKeyClass(keyClass); - copy.setOutputValueClass(valueClass); - } + public KeyValue(K key, V value) { + this.key = key; + this.value = value; + } - /** - * Get the Job configuration for a OutputFormat defined by the alias - * name. The job returned by this method should be passed to the - * OutputFormat for any configuration instead of the Job that will be - * submitted to the JobClient. - * - * @param alias the name used for the OutputFormat during - * addOutputFormat - * @return Job - */ - public Job getJob(String alias) { - Job copy = outputConfigs.get(alias); - if (copy == null) { - throw new IllegalArgumentException("OutputFormat with alias " + alias - + " has not beed added"); - } - return copy; - } + public K getKey() { + return key; + } - /** - * Configure the job with the multiple output formats added. This method - * should be called after all the output formats have been added and - * configured and before the job submission. - */ - public void configure() { - StringBuilder aliases = new StringBuilder(); - Configuration jobConf = job.getConfiguration(); - for (Entry entry : outputConfigs.entrySet()) { - // Copy credentials - job.getCredentials().addAll(entry.getValue().getCredentials()); - String alias = entry.getKey(); - aliases.append(alias).append(COMMA_DELIM); - // Store the differing configuration for each alias in the job - // as a setting. - setAliasConf(alias, job, entry.getValue()); - } - aliases.delete(aliases.length() - COMMA_DELIM.length(), aliases.length()); - jobConf.set(MO_ALIASES, aliases.toString()); - } + public V getValue() { + return value; + } + @Override + public void write(DataOutput out) throws IOException { + // Ignore. Not required as this will be never + // serialized/deserialized. } - private static class KeyValue implements Writable { - private final K key; - private final V value; + @Override + public void readFields(DataInput in) throws IOException { + // Ignore. Not required as this will be never + // serialized/deserialized. + } + } - public KeyValue(K key, V value) { - this.key = key; - this.value = value; - } + private static class MultiRecordWriter extends RecordWriter { - public K getKey() { - return key; - } + private final Map baseRecordWriters; - public V getValue() { - return value; - } + public MultiRecordWriter(TaskAttemptContext context) throws IOException, + InterruptedException { + baseRecordWriters = new LinkedHashMap(); + String[] aliases = getOutputFormatAliases(context); + for (String alias : aliases) { + LOGGER.info("Creating record writer for alias: " + alias); + TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); + Configuration aliasConf = aliasContext.getConfiguration(); + // Create output directory if not already created. + String outDir = aliasConf.get("mapred.output.dir"); + if (outDir != null) { + Path outputDir = new Path(outDir); + FileSystem fs = outputDir.getFileSystem(aliasConf); + if (!fs.exists(outputDir)) { + fs.mkdirs(outputDir); + } + } + OutputFormat outputFormat = getOutputFormatInstance(aliasContext); + baseRecordWriters.put(alias, + new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), + aliasContext)); + } + } - @Override - public void write(DataOutput out) throws IOException { - // Ignore. Not required as this will be never - // serialized/deserialized. - } + @Override + public void write(Writable key, Writable value) throws IOException, InterruptedException { + Text _key = (Text) key; + KeyValue _value = (KeyValue) value; + String alias = new String(_key.getBytes(), 0, _key.getLength()); + BaseRecordWriterContainer baseRWContainer = baseRecordWriters.get(alias); + if (baseRWContainer == null) { + throw new IllegalArgumentException("OutputFormat with alias " + alias + + " has not been added"); + } + baseRWContainer.getRecordWriter().write(_value.getKey(), _value.getValue()); + } - @Override - public void readFields(DataInput in) throws IOException { - // Ignore. Not required as this will be never - // serialized/deserialized. - } + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + for (Entry entry : baseRecordWriters.entrySet()) { + BaseRecordWriterContainer baseRWContainer = entry.getValue(); + LOGGER.info("Closing record writer for alias: " + entry.getKey()); + baseRWContainer.getRecordWriter().close(baseRWContainer.getContext()); + } } - private static class MultiRecordWriter extends RecordWriter { - - private final Map baseRecordWriters; - - public MultiRecordWriter(TaskAttemptContext context) throws IOException, - InterruptedException { - baseRecordWriters = new LinkedHashMap(); - String[] aliases = getOutputFormatAliases(context); - for (String alias : aliases) { - LOGGER.info("Creating record writer for alias: " + alias); - TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); - Configuration aliasConf = aliasContext.getConfiguration(); - // Create output directory if not already created. - String outDir = aliasConf.get("mapred.output.dir"); - if (outDir != null) { - Path outputDir = new Path(outDir); - FileSystem fs = outputDir.getFileSystem(aliasConf); - if (!fs.exists(outputDir)) { - fs.mkdirs(outputDir); - } - } - OutputFormat outputFormat = getOutputFormatInstance(aliasContext); - baseRecordWriters.put(alias, - new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), - aliasContext)); - } - } + } - @Override - public void write(Writable key, Writable value) throws IOException, InterruptedException { - Text _key = (Text) key; - KeyValue _value = (KeyValue) value; - String alias = new String(_key.getBytes(), 0, _key.getLength()); - BaseRecordWriterContainer baseRWContainer = baseRecordWriters.get(alias); - if (baseRWContainer == null) { - throw new IllegalArgumentException("OutputFormat with alias " + alias - + " has not been added"); - } - baseRWContainer.getRecordWriter().write(_value.getKey(), _value.getValue()); - } + private static class BaseRecordWriterContainer { - @Override - public void close(TaskAttemptContext context) throws IOException, InterruptedException { - for (Entry entry : baseRecordWriters.entrySet()) { - BaseRecordWriterContainer baseRWContainer = entry.getValue(); - LOGGER.info("Closing record writer for alias: " + entry.getKey()); - baseRWContainer.getRecordWriter().close(baseRWContainer.getContext()); - } - } + private final RecordWriter recordWriter; + private final TaskAttemptContext context; + public BaseRecordWriterContainer(RecordWriter recordWriter, TaskAttemptContext context) { + this.recordWriter = recordWriter; + this.context = context; } - private static class BaseRecordWriterContainer { - - private final RecordWriter recordWriter; - private final TaskAttemptContext context; - - public BaseRecordWriterContainer(RecordWriter recordWriter, TaskAttemptContext context) { - this.recordWriter = recordWriter; - this.context = context; - } - - public RecordWriter getRecordWriter() { - return recordWriter; - } + public RecordWriter getRecordWriter() { + return recordWriter; + } - public TaskAttemptContext getContext() { - return context; - } + public TaskAttemptContext getContext() { + return context; } + } - public class MultiOutputCommitter extends OutputCommitter { + public class MultiOutputCommitter extends OutputCommitter { - private final Map outputCommitters; + private final Map outputCommitters; - public MultiOutputCommitter(TaskAttemptContext context) throws IOException, - InterruptedException { - outputCommitters = new LinkedHashMap(); - String[] aliases = getOutputFormatAliases(context); - for (String alias : aliases) { - LOGGER.info("Creating output committer for alias: " + alias); - TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); - OutputCommitter baseCommitter = getOutputFormatInstance(aliasContext) - .getOutputCommitter(aliasContext); - outputCommitters.put(alias, - new BaseOutputCommitterContainer(baseCommitter, aliasContext)); - } - } + public MultiOutputCommitter(TaskAttemptContext context) throws IOException, + InterruptedException { + outputCommitters = new LinkedHashMap(); + String[] aliases = getOutputFormatAliases(context); + for (String alias : aliases) { + LOGGER.info("Creating output committer for alias: " + alias); + TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); + OutputCommitter baseCommitter = getOutputFormatInstance(aliasContext) + .getOutputCommitter(aliasContext); + outputCommitters.put(alias, + new BaseOutputCommitterContainer(baseCommitter, aliasContext)); + } + } - @Override - public void setupJob(JobContext jobContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling setupJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().setupJob(outputContainer.getContext()); - } - } + @Override + public void setupJob(JobContext jobContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling setupJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().setupJob(outputContainer.getContext()); + } + } - @Override - public void setupTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling setupTask for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().setupTask(outputContainer.getContext()); - } - } + @Override + public void setupTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling setupTask for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().setupTask(outputContainer.getContext()); + } + } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { - boolean needTaskCommit = false; - for (String alias : outputCommitters.keySet()) { - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - needTaskCommit = needTaskCommit - || outputContainer.getBaseCommitter().needsTaskCommit( - outputContainer.getContext()); - } - return needTaskCommit; - } + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { + boolean needTaskCommit = false; + for (String alias : outputCommitters.keySet()) { + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + needTaskCommit = needTaskCommit + || outputContainer.getBaseCommitter().needsTaskCommit( + outputContainer.getContext()); + } + return needTaskCommit; + } - @Override - public void commitTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - OutputCommitter baseCommitter = outputContainer.getBaseCommitter(); - TaskAttemptContext committerContext = outputContainer.getContext(); - if (baseCommitter.needsTaskCommit(committerContext)) { - LOGGER.info("Calling commitTask for alias: " + alias); - baseCommitter.commitTask(committerContext); - } - } - } + @Override + public void commitTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + OutputCommitter baseCommitter = outputContainer.getBaseCommitter(); + TaskAttemptContext committerContext = outputContainer.getContext(); + if (baseCommitter.needsTaskCommit(committerContext)) { + LOGGER.info("Calling commitTask for alias: " + alias); + baseCommitter.commitTask(committerContext); + } + } + } - @Override - public void abortTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling abortTask for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().abortTask(outputContainer.getContext()); - } - } + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling abortTask for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().abortTask(outputContainer.getContext()); + } + } - @Override - public void commitJob(JobContext jobContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling commitJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().commitJob(outputContainer.getContext()); - } - } + @Override + public void commitJob(JobContext jobContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling commitJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().commitJob(outputContainer.getContext()); + } + } - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling abortJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().abortJob(outputContainer.getContext(), state); - } - } + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling abortJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().abortJob(outputContainer.getContext(), state); + } } + } - private static class BaseOutputCommitterContainer { + private static class BaseOutputCommitterContainer { - private final OutputCommitter outputCommitter; - private final TaskAttemptContext context; + private final OutputCommitter outputCommitter; + private final TaskAttemptContext context; - public BaseOutputCommitterContainer(OutputCommitter outputCommitter, - TaskAttemptContext context) { - this.outputCommitter = outputCommitter; - this.context = context; - } + public BaseOutputCommitterContainer(OutputCommitter outputCommitter, + TaskAttemptContext context) { + this.outputCommitter = outputCommitter; + this.context = context; + } - public OutputCommitter getBaseCommitter() { - return outputCommitter; - } + public OutputCommitter getBaseCommitter() { + return outputCommitter; + } - public TaskAttemptContext getContext() { - return context; - } + public TaskAttemptContext getContext() { + return context; } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputCommitterContainer.java index 1caf5b3..b29828f 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputCommitterContainer.java @@ -28,21 +28,21 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.OutputCommitterContainer} instead */ abstract class OutputCommitterContainer extends OutputCommitter { - private final org.apache.hadoop.mapred.OutputCommitter committer; + private final org.apache.hadoop.mapred.OutputCommitter committer; - /** - * @param context current JobContext - * @param committer OutputCommitter that this instance will contain - */ - public OutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter committer) { - this.committer = committer; - } + /** + * @param context current JobContext + * @param committer OutputCommitter that this instance will contain + */ + public OutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter committer) { + this.committer = committer; + } - /** - * @return underlying OutputCommitter - */ - public OutputCommitter getBaseOutputCommitter() { - return committer; - } + /** + * @return underlying OutputCommitter + */ + public OutputCommitter getBaseOutputCommitter() { + return committer; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java index 77b6cc0..e7c6cc6 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java @@ -35,20 +35,20 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.OutputFormatContainer} instead */ abstract class OutputFormatContainer extends OutputFormat, HCatRecord> { - private org.apache.hadoop.mapred.OutputFormat, ? super Writable> of; + private org.apache.hadoop.mapred.OutputFormat, ? super Writable> of; - /** - * @param of OutputFormat this instance will contain - */ - public OutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { - this.of = of; - } + /** + * @param of OutputFormat this instance will contain + */ + public OutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { + this.of = of; + } - /** - * @return underlying OutputFormat - */ - public org.apache.hadoop.mapred.OutputFormat getBaseOutputFormat() { - return of; - } + /** + * @return underlying OutputFormat + */ + public org.apache.hadoop.mapred.OutputFormat getBaseOutputFormat() { + return of; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java index 24a3323..696ca64 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java @@ -35,239 +35,239 @@ */ public class OutputJobInfo implements Serializable { - /** The db and table names. */ - private final String databaseName; - private final String tableName; - - /** The serialization version. */ - private static final long serialVersionUID = 1L; - - /** The table info provided by user. */ - private HCatTableInfo tableInfo; - - /** The output schema. This is given to us by user. This wont contain any - * partition columns ,even if user has specified them. - * */ - private HCatSchema outputSchema; - - /** The location of the partition being written */ - private String location; - - /** The partition values to publish to, if used for output*/ - private Map partitionValues; - - private List posOfPartCols; - private List posOfDynPartCols; - - private Properties properties; - - private int maxDynamicPartitions; - - /** List of keys for which values were not specified at write setup time, to be infered at write time */ - private List dynamicPartitioningKeys; - - private boolean harRequested; - - /** - * Initializes a new OutputJobInfo instance - * for writing data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param partitionValues The partition values to publish to, can be null or empty Map to - * work with hadoop security, the kerberos principal name of the server - else null - * The principal name should be of the form: - * /_HOST@ like "hcat/_HOST@myrealm.com" - * The special string _HOST will be replaced automatically with the correct host name - * indicate write to a unpartitioned table. For partitioned tables, this map should - * contain keys for all partition columns with corresponding values. - */ - public static OutputJobInfo create(String databaseName, - String tableName, - Map partitionValues) { - return new OutputJobInfo(databaseName, - tableName, - partitionValues); - } - - private OutputJobInfo(String databaseName, - String tableName, - Map partitionValues) { - this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.partitionValues = partitionValues; - this.properties = new Properties(); - } - - /** - * @return the posOfPartCols - */ - protected List getPosOfPartCols() { - return posOfPartCols; - } - - /** - * @return the posOfDynPartCols - */ - protected List getPosOfDynPartCols() { - return posOfDynPartCols; - } - - /** - * @param posOfPartCols the posOfPartCols to set - */ - protected void setPosOfPartCols(List posOfPartCols) { - // sorting the list in the descending order so that deletes happen back-to-front - Collections.sort(posOfPartCols, new Comparator() { - @Override - public int compare(Integer earlier, Integer later) { - return (earlier > later) ? -1 : ((earlier == later) ? 0 : 1); - } - }); - this.posOfPartCols = posOfPartCols; - } - - /** - * @param posOfDynPartCols the posOfDynPartCols to set - */ - protected void setPosOfDynPartCols(List posOfDynPartCols) { - // Important - no sorting here! We retain order, it's used to match with values at runtime - this.posOfDynPartCols = posOfDynPartCols; - } - - /** - * @return the tableInfo - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } - - /** - * @return the outputSchema - */ - public HCatSchema getOutputSchema() { - return outputSchema; - } - - /** - * @param schema the outputSchema to set - */ - public void setOutputSchema(HCatSchema schema) { - this.outputSchema = schema; - } - - /** - * @return the location - */ - public String getLocation() { - return location; - } - - /** - * @param location location to write to - */ - public void setLocation(String location) { - this.location = location; - } - - /** - * Sets the value of partitionValues - * @param partitionValues the partition values to set - */ - void setPartitionValues(Map partitionValues) { - this.partitionValues = partitionValues; - } - - /** - * Gets the value of partitionValues - * @return the partitionValues - */ - public Map getPartitionValues() { - return partitionValues; - } - - /** - * set the tablInfo instance - * this should be the same instance - * determined by this object's DatabaseName and TableName - * @param tableInfo - */ - void setTableInfo(HCatTableInfo tableInfo) { - this.tableInfo = tableInfo; - } - - /** - * @return database name of table to write to - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * @return name of table to write to - */ - public String getTableName() { - return tableName; - } - - /** - * Set/Get Property information to be passed down to *StorageHandler implementation - * put implementation specific storage handler configurations here - * @return the implementation specific job properties - */ - public Properties getProperties() { - return properties; - } - - /** - * Set maximum number of allowable dynamic partitions - * @param maxDynamicPartitions - */ - public void setMaximumDynamicPartitions(int maxDynamicPartitions) { - this.maxDynamicPartitions = maxDynamicPartitions; - } - - /** - * Returns maximum number of allowable dynamic partitions - * @return maximum number of allowable dynamic partitions - */ - public int getMaxDynamicPartitions() { - return this.maxDynamicPartitions; - } - - /** - * Sets whether or not hadoop archiving has been requested for this job - * @param harRequested - */ - public void setHarRequested(boolean harRequested) { - this.harRequested = harRequested; - } - - /** - * Returns whether or not hadoop archiving has been requested for this job - * @return whether or not hadoop archiving has been requested for this job - */ - public boolean getHarRequested() { - return this.harRequested; - } - - /** - * Returns whether or not Dynamic Partitioning is used - * @return whether or not dynamic partitioning is currently enabled and used - */ - public boolean isDynamicPartitioningUsed() { - return !((dynamicPartitioningKeys == null) || (dynamicPartitioningKeys.isEmpty())); - } - - /** - * Sets the list of dynamic partitioning keys used for outputting without specifying all the keys - * @param dynamicPartitioningKeys - */ - public void setDynamicPartitioningKeys(List dynamicPartitioningKeys) { - this.dynamicPartitioningKeys = dynamicPartitioningKeys; - } - - public List getDynamicPartitioningKeys() { - return this.dynamicPartitioningKeys; - } + /** The db and table names. */ + private final String databaseName; + private final String tableName; + + /** The serialization version. */ + private static final long serialVersionUID = 1L; + + /** The table info provided by user. */ + private HCatTableInfo tableInfo; + + /** The output schema. This is given to us by user. This wont contain any + * partition columns ,even if user has specified them. + * */ + private HCatSchema outputSchema; + + /** The location of the partition being written */ + private String location; + + /** The partition values to publish to, if used for output*/ + private Map partitionValues; + + private List posOfPartCols; + private List posOfDynPartCols; + + private Properties properties; + + private int maxDynamicPartitions; + + /** List of keys for which values were not specified at write setup time, to be infered at write time */ + private List dynamicPartitioningKeys; + + private boolean harRequested; + + /** + * Initializes a new OutputJobInfo instance + * for writing data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param partitionValues The partition values to publish to, can be null or empty Map to + * work with hadoop security, the kerberos principal name of the server - else null + * The principal name should be of the form: + * /_HOST@ like "hcat/_HOST@myrealm.com" + * The special string _HOST will be replaced automatically with the correct host name + * indicate write to a unpartitioned table. For partitioned tables, this map should + * contain keys for all partition columns with corresponding values. + */ + public static OutputJobInfo create(String databaseName, + String tableName, + Map partitionValues) { + return new OutputJobInfo(databaseName, + tableName, + partitionValues); + } + + private OutputJobInfo(String databaseName, + String tableName, + Map partitionValues) { + this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.partitionValues = partitionValues; + this.properties = new Properties(); + } + + /** + * @return the posOfPartCols + */ + protected List getPosOfPartCols() { + return posOfPartCols; + } + + /** + * @return the posOfDynPartCols + */ + protected List getPosOfDynPartCols() { + return posOfDynPartCols; + } + + /** + * @param posOfPartCols the posOfPartCols to set + */ + protected void setPosOfPartCols(List posOfPartCols) { + // sorting the list in the descending order so that deletes happen back-to-front + Collections.sort(posOfPartCols, new Comparator() { + @Override + public int compare(Integer earlier, Integer later) { + return (earlier > later) ? -1 : ((earlier == later) ? 0 : 1); + } + }); + this.posOfPartCols = posOfPartCols; + } + + /** + * @param posOfDynPartCols the posOfDynPartCols to set + */ + protected void setPosOfDynPartCols(List posOfDynPartCols) { + // Important - no sorting here! We retain order, it's used to match with values at runtime + this.posOfDynPartCols = posOfDynPartCols; + } + + /** + * @return the tableInfo + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } + + /** + * @return the outputSchema + */ + public HCatSchema getOutputSchema() { + return outputSchema; + } + + /** + * @param schema the outputSchema to set + */ + public void setOutputSchema(HCatSchema schema) { + this.outputSchema = schema; + } + + /** + * @return the location + */ + public String getLocation() { + return location; + } + + /** + * @param location location to write to + */ + public void setLocation(String location) { + this.location = location; + } + + /** + * Sets the value of partitionValues + * @param partitionValues the partition values to set + */ + void setPartitionValues(Map partitionValues) { + this.partitionValues = partitionValues; + } + + /** + * Gets the value of partitionValues + * @return the partitionValues + */ + public Map getPartitionValues() { + return partitionValues; + } + + /** + * set the tablInfo instance + * this should be the same instance + * determined by this object's DatabaseName and TableName + * @param tableInfo + */ + void setTableInfo(HCatTableInfo tableInfo) { + this.tableInfo = tableInfo; + } + + /** + * @return database name of table to write to + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * @return name of table to write to + */ + public String getTableName() { + return tableName; + } + + /** + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here + * @return the implementation specific job properties + */ + public Properties getProperties() { + return properties; + } + + /** + * Set maximum number of allowable dynamic partitions + * @param maxDynamicPartitions + */ + public void setMaximumDynamicPartitions(int maxDynamicPartitions) { + this.maxDynamicPartitions = maxDynamicPartitions; + } + + /** + * Returns maximum number of allowable dynamic partitions + * @return maximum number of allowable dynamic partitions + */ + public int getMaxDynamicPartitions() { + return this.maxDynamicPartitions; + } + + /** + * Sets whether or not hadoop archiving has been requested for this job + * @param harRequested + */ + public void setHarRequested(boolean harRequested) { + this.harRequested = harRequested; + } + + /** + * Returns whether or not hadoop archiving has been requested for this job + * @return whether or not hadoop archiving has been requested for this job + */ + public boolean getHarRequested() { + return this.harRequested; + } + + /** + * Returns whether or not Dynamic Partitioning is used + * @return whether or not dynamic partitioning is currently enabled and used + */ + public boolean isDynamicPartitioningUsed() { + return !((dynamicPartitioningKeys == null) || (dynamicPartitioningKeys.isEmpty())); + } + + /** + * Sets the list of dynamic partitioning keys used for outputting without specifying all the keys + * @param dynamicPartitioningKeys + */ + public void setDynamicPartitioningKeys(List dynamicPartitioningKeys) { + this.dynamicPartitioningKeys = dynamicPartitioningKeys; + } + + public List getDynamicPartitioningKeys() { + return this.dynamicPartitioningKeys; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/PartInfo.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/PartInfo.java index 604a36b..e22d347 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/PartInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/PartInfo.java @@ -30,138 +30,138 @@ */ public class PartInfo implements Serializable { - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The partition schema. */ - private final HCatSchema partitionSchema; - - /** The information about which input storage handler to use */ - private final String storageHandlerClassName; - private final String inputFormatClassName; - private final String outputFormatClassName; - private final String serdeClassName; - - /** HCat-specific properties set at the partition */ - private final Properties hcatProperties; - - /** The data location. */ - private final String location; - - /** The map of partition key names and their values. */ - private Map partitionValues; - - /** Job properties associated with this parition */ - Map jobProperties; - - /** the table info associated with this partition */ - HCatTableInfo tableInfo; - - /** - * Instantiates a new hcat partition info. - * @param partitionSchema the partition schema - * @param storageHandler the storage handler - * @param location the location - * @param hcatProperties hcat-specific properties at the partition - * @param jobProperties the job properties - * @param tableInfo the table information - */ - public PartInfo(HCatSchema partitionSchema, HCatStorageHandler storageHandler, - String location, Properties hcatProperties, - Map jobProperties, HCatTableInfo tableInfo) { - this.partitionSchema = partitionSchema; - this.location = location; - this.hcatProperties = hcatProperties; - this.jobProperties = jobProperties; - this.tableInfo = tableInfo; - - this.storageHandlerClassName = storageHandler.getClass().getName(); - this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); - this.serdeClassName = storageHandler.getSerDeClass().getName(); - this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); - } - - /** - * Gets the value of partitionSchema. - * @return the partitionSchema - */ - public HCatSchema getPartitionSchema() { - return partitionSchema; - } - - /** - * @return the storage handler class name - */ - public String getStorageHandlerClassName() { - return storageHandlerClassName; - } - - /** - * @return the inputFormatClassName - */ - public String getInputFormatClassName() { - return inputFormatClassName; - } - - /** - * @return the outputFormatClassName - */ - public String getOutputFormatClassName() { - return outputFormatClassName; - } - - /** - * @return the serdeClassName - */ - public String getSerdeClassName() { - return serdeClassName; - } - - /** - * Gets the input storage handler properties. - * @return HCat-specific properties set at the partition - */ - public Properties getInputStorageHandlerProperties() { - return hcatProperties; - } - - /** - * Gets the value of location. - * @return the location - */ - public String getLocation() { - return location; - } - - /** - * Sets the partition values. - * @param partitionValues the new partition values - */ - public void setPartitionValues(Map partitionValues) { - this.partitionValues = partitionValues; - } - - /** - * Gets the partition values. - * @return the partition values - */ - public Map getPartitionValues() { - return partitionValues; - } - - /** - * Gets the job properties. - * @return a map of the job properties - */ - public Map getJobProperties() { - return jobProperties; - } - - /** - * Gets the HCatalog table information. - * @return the table information - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The partition schema. */ + private final HCatSchema partitionSchema; + + /** The information about which input storage handler to use */ + private final String storageHandlerClassName; + private final String inputFormatClassName; + private final String outputFormatClassName; + private final String serdeClassName; + + /** HCat-specific properties set at the partition */ + private final Properties hcatProperties; + + /** The data location. */ + private final String location; + + /** The map of partition key names and their values. */ + private Map partitionValues; + + /** Job properties associated with this parition */ + Map jobProperties; + + /** the table info associated with this partition */ + HCatTableInfo tableInfo; + + /** + * Instantiates a new hcat partition info. + * @param partitionSchema the partition schema + * @param storageHandler the storage handler + * @param location the location + * @param hcatProperties hcat-specific properties at the partition + * @param jobProperties the job properties + * @param tableInfo the table information + */ + public PartInfo(HCatSchema partitionSchema, HCatStorageHandler storageHandler, + String location, Properties hcatProperties, + Map jobProperties, HCatTableInfo tableInfo) { + this.partitionSchema = partitionSchema; + this.location = location; + this.hcatProperties = hcatProperties; + this.jobProperties = jobProperties; + this.tableInfo = tableInfo; + + this.storageHandlerClassName = storageHandler.getClass().getName(); + this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); + this.serdeClassName = storageHandler.getSerDeClass().getName(); + this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); + } + + /** + * Gets the value of partitionSchema. + * @return the partitionSchema + */ + public HCatSchema getPartitionSchema() { + return partitionSchema; + } + + /** + * @return the storage handler class name + */ + public String getStorageHandlerClassName() { + return storageHandlerClassName; + } + + /** + * @return the inputFormatClassName + */ + public String getInputFormatClassName() { + return inputFormatClassName; + } + + /** + * @return the outputFormatClassName + */ + public String getOutputFormatClassName() { + return outputFormatClassName; + } + + /** + * @return the serdeClassName + */ + public String getSerdeClassName() { + return serdeClassName; + } + + /** + * Gets the input storage handler properties. + * @return HCat-specific properties set at the partition + */ + public Properties getInputStorageHandlerProperties() { + return hcatProperties; + } + + /** + * Gets the value of location. + * @return the location + */ + public String getLocation() { + return location; + } + + /** + * Sets the partition values. + * @param partitionValues the new partition values + */ + public void setPartitionValues(Map partitionValues) { + this.partitionValues = partitionValues; + } + + /** + * Gets the partition values. + * @return the partition values + */ + public Map getPartitionValues() { + return partitionValues; + } + + /** + * Gets the job properties. + * @return a map of the job properties + */ + public Map getJobProperties() { + return jobProperties; + } + + /** + * Gets the HCatalog table information. + * @return the table information + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/ProgressReporter.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/ProgressReporter.java index a95f997..615f1e8 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/ProgressReporter.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/ProgressReporter.java @@ -31,65 +31,65 @@ */ class ProgressReporter extends StatusReporter implements Reporter { - private TaskInputOutputContext context = null; - private TaskAttemptContext taskAttemptContext = null; + private TaskInputOutputContext context = null; + private TaskAttemptContext taskAttemptContext = null; - public ProgressReporter(TaskAttemptContext context) { - if (context instanceof TaskInputOutputContext) { - this.context = (TaskInputOutputContext) context; - } else { - taskAttemptContext = context; - } + public ProgressReporter(TaskAttemptContext context) { + if (context instanceof TaskInputOutputContext) { + this.context = (TaskInputOutputContext) context; + } else { + taskAttemptContext = context; } + } - @Override - public void setStatus(String status) { - if (context != null) { - context.setStatus(status); - } + @Override + public void setStatus(String status) { + if (context != null) { + context.setStatus(status); } + } - @Override - public Counters.Counter getCounter(Enum name) { - return (context != null) ? (Counters.Counter) context.getCounter(name) : null; - } + @Override + public Counters.Counter getCounter(Enum name) { + return (context != null) ? (Counters.Counter) context.getCounter(name) : null; + } - @Override - public Counters.Counter getCounter(String group, String name) { - return (context != null) ? (Counters.Counter) context.getCounter(group, name) : null; - } + @Override + public Counters.Counter getCounter(String group, String name) { + return (context != null) ? (Counters.Counter) context.getCounter(group, name) : null; + } - @Override - public void incrCounter(Enum key, long amount) { - if (context != null) { - context.getCounter(key).increment(amount); - } + @Override + public void incrCounter(Enum key, long amount) { + if (context != null) { + context.getCounter(key).increment(amount); } + } - @Override - public void incrCounter(String group, String counter, long amount) { - if (context != null) { - context.getCounter(group, counter).increment(amount); - } + @Override + public void incrCounter(String group, String counter, long amount) { + if (context != null) { + context.getCounter(group, counter).increment(amount); } + } - @Override - public InputSplit getInputSplit() throws UnsupportedOperationException { - return null; - } + @Override + public InputSplit getInputSplit() throws UnsupportedOperationException { + return null; + } - public float getProgress() { - /* Required to build against 0.23 Reporter and StatusReporter. */ - /* TODO: determine the progress. */ - return 0.0f; - } + public float getProgress() { + /* Required to build against 0.23 Reporter and StatusReporter. */ + /* TODO: determine the progress. */ + return 0.0f; + } - @Override - public void progress() { - if (context != null) { - context.progress(); - } else { - taskAttemptContext.progress(); - } + @Override + public void progress() { + if (context != null) { + context.progress(); + } else { + taskAttemptContext.progress(); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/RecordWriterContainer.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/RecordWriterContainer.java index 69d3abc..f37faa1 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/RecordWriterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/RecordWriterContainer.java @@ -33,22 +33,22 @@ */ abstract class RecordWriterContainer extends RecordWriter, HCatRecord> { - private final org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter; - - /** - * @param context current JobContext - * @param baseRecordWriter RecordWriter that this instance will contain - */ - public RecordWriterContainer(TaskAttemptContext context, - org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) { - this.baseRecordWriter = baseRecordWriter; - } - - /** - * @return underlying RecordWriter - */ - public org.apache.hadoop.mapred.RecordWriter getBaseRecordWriter() { - return baseRecordWriter; - } + private final org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter; + + /** + * @param context current JobContext + * @param baseRecordWriter RecordWriter that this instance will contain + */ + public RecordWriterContainer(TaskAttemptContext context, + org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) { + this.baseRecordWriter = baseRecordWriter; + } + + /** + * @return underlying RecordWriter + */ + public org.apache.hadoop.mapred.RecordWriter getBaseRecordWriter() { + return baseRecordWriter; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/Security.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/Security.java index 6221a04..1fb1252 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/Security.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/Security.java @@ -49,146 +49,146 @@ */ final class Security { - private static final Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); - - // making sure this is not initialized unless needed - private static final class LazyHolder { - public static final Security INSTANCE = new Security(); + private static final Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); + + // making sure this is not initialized unless needed + private static final class LazyHolder { + public static final Security INSTANCE = new Security(); + } + + public static Security getInstance() { + return LazyHolder.INSTANCE; + } + + boolean isSecurityEnabled() { + try { + Method m = UserGroupInformation.class.getMethod("isSecurityEnabled"); + return (Boolean) m.invoke(null, (Object[]) null); + } catch (NoSuchMethodException e) { + LOG.info("Security is not supported by this version of hadoop.", e); + } catch (InvocationTargetException e) { + String msg = "Failed to call isSecurityEnabled()"; + LOG.info(msg, e); + throw new IllegalStateException(msg, e); + } catch (IllegalAccessException e) { + String msg = "Failed to call isSecurityEnabled()"; + LOG.info(msg, e); + throw new IllegalStateException(msg, e); } - - public static Security getInstance() { - return LazyHolder.INSTANCE; + return false; + } + + // a signature string to associate with a HCatTableInfo - essentially + // a concatenation of dbname, tablename and partition keyvalues. + String getTokenSignature(OutputJobInfo outputJobInfo) { + StringBuilder result = new StringBuilder(""); + String dbName = outputJobInfo.getDatabaseName(); + if (dbName != null) { + result.append(dbName); } - - boolean isSecurityEnabled() { - try { - Method m = UserGroupInformation.class.getMethod("isSecurityEnabled"); - return (Boolean) m.invoke(null, (Object[]) null); - } catch (NoSuchMethodException e) { - LOG.info("Security is not supported by this version of hadoop.", e); - } catch (InvocationTargetException e) { - String msg = "Failed to call isSecurityEnabled()"; - LOG.info(msg, e); - throw new IllegalStateException(msg, e); - } catch (IllegalAccessException e) { - String msg = "Failed to call isSecurityEnabled()"; - LOG.info(msg, e); - throw new IllegalStateException(msg, e); - } - return false; + String tableName = outputJobInfo.getTableName(); + if (tableName != null) { + result.append("." + tableName); } + Map partValues = outputJobInfo.getPartitionValues(); + if (partValues != null) { + for (Entry entry : partValues.entrySet()) { + result.append("/"); + result.append(entry.getKey()); + result.append("="); + result.append(entry.getValue()); + } - // a signature string to associate with a HCatTableInfo - essentially - // a concatenation of dbname, tablename and partition keyvalues. - String getTokenSignature(OutputJobInfo outputJobInfo) { - StringBuilder result = new StringBuilder(""); - String dbName = outputJobInfo.getDatabaseName(); - if (dbName != null) { - result.append(dbName); - } - String tableName = outputJobInfo.getTableName(); - if (tableName != null) { - result.append("." + tableName); - } - Map partValues = outputJobInfo.getPartitionValues(); - if (partValues != null) { - for (Entry entry : partValues.entrySet()) { - result.append("/"); - result.append(entry.getKey()); - result.append("="); - result.append(entry.getValue()); - } - - } - return result.toString(); } - - void handleSecurity( - Credentials credentials, - OutputJobInfo outputJobInfo, - HiveMetaStoreClient client, - Configuration conf, - boolean harRequested) - throws IOException, MetaException, TException, Exception { - if (UserGroupInformation.isSecurityEnabled()) { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - // check if oozie has set up a hcat deleg. token - if so use it - TokenSelector hiveTokenSelector = new DelegationTokenSelector(); - //Oozie does not change the service field of the token - //hence by default token generation will have a value of "new Text("")" - //HiveClient will look for a use TokenSelector.selectToken() with service - //set to empty "Text" if hive.metastore.token.signature property is set to null - Token hiveToken = hiveTokenSelector.selectToken( - new Text(), ugi.getTokens()); - if (hiveToken == null) { - // we did not get token set up by oozie, let's get them ourselves here. - // we essentially get a token per unique Output HCatTableInfo - this is - // done because through Pig, setOutput() method is called multiple times - // We want to only get the token once per unique output HCatTableInfo - - // we cannot just get one token since in multi-query case (> 1 store in 1 job) - // or the case when a single pig script results in > 1 jobs, the single - // token will get cancelled by the output committer and the subsequent - // stores will fail - by tying the token with the concatenation of - // dbname, tablename and partition keyvalues of the output - // TableInfo, we can have as many tokens as there are stores and the TokenSelector - // will correctly pick the right tokens which the committer will use and - // cancel. - String tokenSignature = getTokenSignature(outputJobInfo); - // get delegation tokens from hcat server and store them into the "job" - // These will be used in to publish partitions to - // hcat normally in OutputCommitter.commitJob() - // when the JobTracker in Hadoop MapReduce starts supporting renewal of - // arbitrary tokens, the renewer should be the principal of the JobTracker - hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName()), tokenSignature); - - if (harRequested) { - TokenSelector jtTokenSelector = - new org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenSelector(); - Token jtToken = jtTokenSelector.selectToken(org.apache.hadoop.security.SecurityUtil.buildTokenService( - ShimLoader.getHadoopShims().getHCatShim().getResourceManagerAddress(conf)), ugi.getTokens()); - if (jtToken == null) { - //we don't need to cancel this token as the TokenRenewer for JT tokens - //takes care of cancelling them - credentials.addToken( - new Text("hcat jt token"), - HCatUtil.getJobTrackerDelegationToken(conf, ugi.getUserName()) - ); - } - } - - credentials.addToken(new Text(ugi.getUserName() + "_" + tokenSignature), hiveToken); - // this will be used by the outputcommitter to pass on to the metastore client - // which in turn will pass on to the TokenSelector so that it can select - // the right token. - conf.set(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE, tokenSignature); - } + return result.toString(); + } + + void handleSecurity( + Credentials credentials, + OutputJobInfo outputJobInfo, + HiveMetaStoreClient client, + Configuration conf, + boolean harRequested) + throws IOException, MetaException, TException, Exception { + if (UserGroupInformation.isSecurityEnabled()) { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + // check if oozie has set up a hcat deleg. token - if so use it + TokenSelector hiveTokenSelector = new DelegationTokenSelector(); + //Oozie does not change the service field of the token + //hence by default token generation will have a value of "new Text("")" + //HiveClient will look for a use TokenSelector.selectToken() with service + //set to empty "Text" if hive.metastore.token.signature property is set to null + Token hiveToken = hiveTokenSelector.selectToken( + new Text(), ugi.getTokens()); + if (hiveToken == null) { + // we did not get token set up by oozie, let's get them ourselves here. + // we essentially get a token per unique Output HCatTableInfo - this is + // done because through Pig, setOutput() method is called multiple times + // We want to only get the token once per unique output HCatTableInfo - + // we cannot just get one token since in multi-query case (> 1 store in 1 job) + // or the case when a single pig script results in > 1 jobs, the single + // token will get cancelled by the output committer and the subsequent + // stores will fail - by tying the token with the concatenation of + // dbname, tablename and partition keyvalues of the output + // TableInfo, we can have as many tokens as there are stores and the TokenSelector + // will correctly pick the right tokens which the committer will use and + // cancel. + String tokenSignature = getTokenSignature(outputJobInfo); + // get delegation tokens from hcat server and store them into the "job" + // These will be used in to publish partitions to + // hcat normally in OutputCommitter.commitJob() + // when the JobTracker in Hadoop MapReduce starts supporting renewal of + // arbitrary tokens, the renewer should be the principal of the JobTracker + hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName()), tokenSignature); + + if (harRequested) { + TokenSelector jtTokenSelector = + new org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenSelector(); + Token jtToken = jtTokenSelector.selectToken(org.apache.hadoop.security.SecurityUtil.buildTokenService( + ShimLoader.getHadoopShims().getHCatShim().getResourceManagerAddress(conf)), ugi.getTokens()); + if (jtToken == null) { + //we don't need to cancel this token as the TokenRenewer for JT tokens + //takes care of cancelling them + credentials.addToken( + new Text("hcat jt token"), + HCatUtil.getJobTrackerDelegationToken(conf, ugi.getUserName()) + ); + } } - } - void handleSecurity( - Job job, - OutputJobInfo outputJobInfo, - HiveMetaStoreClient client, - Configuration conf, - boolean harRequested) - throws IOException, MetaException, TException, Exception { - handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); + credentials.addToken(new Text(ugi.getUserName() + "_" + tokenSignature), hiveToken); + // this will be used by the outputcommitter to pass on to the metastore client + // which in turn will pass on to the TokenSelector so that it can select + // the right token. + conf.set(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE, tokenSignature); + } } - - // we should cancel hcat token if it was acquired by hcat - // and not if it was supplied (ie Oozie). In the latter - // case the HCAT_KEY_TOKEN_SIGNATURE property in the conf will not be set - void cancelToken(HiveMetaStoreClient client, JobContext context) throws IOException, MetaException { - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - try { - client.cancelDelegationToken(tokenStrForm); - } catch (TException e) { - String msg = "Failed to cancel delegation token"; - LOG.error(msg, e); - throw new IOException(msg, e); - } - } + } + + void handleSecurity( + Job job, + OutputJobInfo outputJobInfo, + HiveMetaStoreClient client, + Configuration conf, + boolean harRequested) + throws IOException, MetaException, TException, Exception { + handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); + } + + // we should cancel hcat token if it was acquired by hcat + // and not if it was supplied (ie Oozie). In the latter + // case the HCAT_KEY_TOKEN_SIGNATURE property in the conf will not be set + void cancelToken(HiveMetaStoreClient client, JobContext context) throws IOException, MetaException { + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + try { + client.cancelDelegationToken(tokenStrForm); + } catch (TException e) { + String msg = "Failed to cancel delegation token"; + LOG.error(msg, e); + throw new IOException(msg, e); + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/StorerInfo.java b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/StorerInfo.java index 446f181..bb48490 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/StorerInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/StorerInfo.java @@ -28,85 +28,85 @@ */ public class StorerInfo implements Serializable { - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The properties for the storage handler */ - private Properties properties; - - private String ofClass; - - private String ifClass; - - private String serdeClass; - - private String storageHandlerClass; - - /** - * Initialize the storer information. - * @param ifClass the input format class - * @param ofClass the output format class - * @param serdeClass the SerDe class - * @param storageHandlerClass the storage handler class - * @param properties the properties for the storage handler - */ - public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { - super(); - this.ifClass = ifClass; - this.ofClass = ofClass; - this.serdeClass = serdeClass; - this.storageHandlerClass = storageHandlerClass; - this.properties = properties; - } - - /** - * @return the input format class - */ - public String getIfClass() { - return ifClass; - } - - /** - * @param ifClass the input format class - */ - public void setIfClass(String ifClass) { - this.ifClass = ifClass; - } - - /** - * @return the output format class - */ - public String getOfClass() { - return ofClass; - } - - /** - * @return the serdeClass - */ - public String getSerdeClass() { - return serdeClass; - } - - /** - * @return the storageHandlerClass - */ - public String getStorageHandlerClass() { - return storageHandlerClass; - } - - /** - * @return the storer properties - */ - public Properties getProperties() { - return properties; - } - - /** - * @param properties the storer properties to set - */ - public void setProperties(Properties properties) { - this.properties = properties; - } + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The properties for the storage handler */ + private Properties properties; + + private String ofClass; + + private String ifClass; + + private String serdeClass; + + private String storageHandlerClass; + + /** + * Initialize the storer information. + * @param ifClass the input format class + * @param ofClass the output format class + * @param serdeClass the SerDe class + * @param storageHandlerClass the storage handler class + * @param properties the properties for the storage handler + */ + public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { + super(); + this.ifClass = ifClass; + this.ofClass = ofClass; + this.serdeClass = serdeClass; + this.storageHandlerClass = storageHandlerClass; + this.properties = properties; + } + + /** + * @return the input format class + */ + public String getIfClass() { + return ifClass; + } + + /** + * @param ifClass the input format class + */ + public void setIfClass(String ifClass) { + this.ifClass = ifClass; + } + + /** + * @return the output format class + */ + public String getOfClass() { + return ofClass; + } + + /** + * @return the serdeClass + */ + public String getSerdeClass() { + return serdeClass; + } + + /** + * @return the storageHandlerClass + */ + public String getStorageHandlerClass() { + return storageHandlerClass; + } + + /** + * @return the storer properties + */ + public Properties getProperties() { + return properties; + } + + /** + * @param properties the storer properties to set + */ + public void setProperties(Properties properties) { + this.properties = properties; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/oozie/JavaAction.java b/hcatalog/core/src/main/java/org/apache/hcatalog/oozie/JavaAction.java index dcc7fed..eb82c2c 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/oozie/JavaAction.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/oozie/JavaAction.java @@ -31,14 +31,14 @@ */ public class JavaAction { - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { - HiveConf conf = new HiveConf(); - conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); - conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); - conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); - SessionState.start(new CliSessionState(conf)); - new CliDriver().processLine(args[0]); - } + HiveConf conf = new HiveConf(); + conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); + conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); + conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); + SessionState.start(new CliSessionState(conf)); + new CliDriver().processLine(args[0]); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceInputFormat.java b/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceInputFormat.java index a61d07a..de9178b 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceInputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceInputFormat.java @@ -34,20 +34,20 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.rcfile.RCFileMapReduceInputFormat} instead */ public class RCFileMapReduceInputFormat - extends FileInputFormat { + extends FileInputFormat { - @Override - public RecordReader createRecordReader(InputSplit split, - TaskAttemptContext context) throws IOException, InterruptedException { + @Override + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { - context.setStatus(split.toString()); - return new RCFileMapReduceRecordReader(); - } + context.setStatus(split.toString()); + return new RCFileMapReduceRecordReader(); + } - @Override - public List getSplits(JobContext job) throws IOException { + @Override + public List getSplits(JobContext job) throws IOException { - job.getConfiguration().setLong("mapred.min.split.size", SequenceFile.SYNC_INTERVAL); - return super.getSplits(job); - } + job.getConfiguration().setLong("mapred.min.split.size", SequenceFile.SYNC_INTERVAL); + return super.getSplits(job); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceOutputFormat.java index a0775dc..000a723 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceOutputFormat.java @@ -39,68 +39,68 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.rcfile.RCFileMapReduceOutputFormat} instead */ public class RCFileMapReduceOutputFormat extends - FileOutputFormat, BytesRefArrayWritable> { + FileOutputFormat, BytesRefArrayWritable> { - /** - * Set number of columns into the given configuration. - * @param conf - * configuration instance which need to set the column number - * @param columnNum - * column number for RCFile's Writer - * - */ - public static void setColumnNumber(Configuration conf, int columnNum) { - assert columnNum > 0; - conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum); - } + /** + * Set number of columns into the given configuration. + * @param conf + * configuration instance which need to set the column number + * @param columnNum + * column number for RCFile's Writer + * + */ + public static void setColumnNumber(Configuration conf, int columnNum) { + assert columnNum > 0; + conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum); + } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public org.apache.hadoop.mapreduce.RecordWriter, BytesRefArrayWritable> getRecordWriter( - TaskAttemptContext task) throws IOException, InterruptedException { + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public org.apache.hadoop.mapreduce.RecordWriter, BytesRefArrayWritable> getRecordWriter( + TaskAttemptContext task) throws IOException, InterruptedException { - //FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of - //TaskAttemptContext, so can't use that here - FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task); - Path outputPath = committer.getWorkPath(); + //FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of + //TaskAttemptContext, so can't use that here + FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task); + Path outputPath = committer.getWorkPath(); - FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); + FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); - if (!fs.exists(outputPath)) { - fs.mkdirs(outputPath); - } + if (!fs.exists(outputPath)) { + fs.mkdirs(outputPath); + } - Path file = getDefaultWorkFile(task, ""); + Path file = getDefaultWorkFile(task, ""); - CompressionCodec codec = null; - if (getCompressOutput(task)) { - Class codecClass = getOutputCompressorClass(task, DefaultCodec.class); - codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); - } + CompressionCodec codec = null; + if (getCompressOutput(task)) { + Class codecClass = getOutputCompressorClass(task, DefaultCodec.class); + codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); + } - final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec); + final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec); - return new RecordWriter, BytesRefArrayWritable>() { + return new RecordWriter, BytesRefArrayWritable>() { - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object) - */ - @Override - public void write(WritableComparable key, BytesRefArrayWritable value) - throws IOException { - out.append(value); - } + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object) + */ + @Override + public void write(WritableComparable key, BytesRefArrayWritable value) + throws IOException { + out.append(value); + } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public void close(TaskAttemptContext task) throws IOException, InterruptedException { - out.close(); - } - }; - } + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public void close(TaskAttemptContext task) throws IOException, InterruptedException { + out.close(); + } + }; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceRecordReader.java b/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceRecordReader.java index 1f82501..2f80ab8 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceRecordReader.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceRecordReader.java @@ -34,90 +34,90 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.rcfile.RCFileMapReduceRecordReader} instead */ public class RCFileMapReduceRecordReader - extends RecordReader { - - private Reader in; - private long start; - private long end; - private boolean more = true; - - // key and value objects are created once in initialize() and then reused - // for every getCurrentKey() and getCurrentValue() call. This is important - // since RCFile makes an assumption of this fact. - - private LongWritable key; - private BytesRefArrayWritable value; - - @Override - public void close() throws IOException { - in.close(); - } - - @Override - public LongWritable getCurrentKey() throws IOException, InterruptedException { - return key; + extends RecordReader { + + private Reader in; + private long start; + private long end; + private boolean more = true; + + // key and value objects are created once in initialize() and then reused + // for every getCurrentKey() and getCurrentValue() call. This is important + // since RCFile makes an assumption of this fact. + + private LongWritable key; + private BytesRefArrayWritable value; + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public LongWritable getCurrentKey() throws IOException, InterruptedException { + return key; + } + + @Override + public BytesRefArrayWritable getCurrentValue() throws IOException, InterruptedException { + return value; + } + + @Override + public float getProgress() throws IOException, InterruptedException { + if (end == start) { + return 0.0f; + } else { + return Math.min(1.0f, (in.getPosition() - start) / (float) (end - start)); } + } - @Override - public BytesRefArrayWritable getCurrentValue() throws IOException, InterruptedException { - return value; - } + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { - @Override - public float getProgress() throws IOException, InterruptedException { - if (end == start) { - return 0.0f; - } else { - return Math.min(1.0f, (in.getPosition() - start) / (float) (end - start)); - } + more = next(key); + if (more) { + in.getCurrentRow(value); } - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { + return more; + } - more = next(key); - if (more) { - in.getCurrentRow(value); - } + private boolean next(LongWritable key) throws IOException { + if (!more) { + return false; + } - return more; + more = in.next(key); + if (!more) { + return false; } - private boolean next(LongWritable key) throws IOException { - if (!more) { - return false; - } - - more = in.next(key); - if (!more) { - return false; - } - - if (in.lastSeenSyncPos() >= end) { - more = false; - return more; - } - return more; + if (in.lastSeenSyncPos() >= end) { + more = false; + return more; } + return more; + } - @Override - public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, - InterruptedException { + @Override + public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, + InterruptedException { - FileSplit fSplit = (FileSplit) split; - Path path = fSplit.getPath(); - Configuration conf = context.getConfiguration(); - this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); - this.end = fSplit.getStart() + fSplit.getLength(); + FileSplit fSplit = (FileSplit) split; + Path path = fSplit.getPath(); + Configuration conf = context.getConfiguration(); + this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); + this.end = fSplit.getStart() + fSplit.getLength(); - if (fSplit.getStart() > in.getPosition()) { - in.sync(fSplit.getStart()); - } + if (fSplit.getStart() > in.getPosition()) { + in.sync(fSplit.getStart()); + } - this.start = in.getPosition(); - more = start < end; + this.start = in.getPosition(); + more = start < end; - key = new LongWritable(); - value = new BytesRefArrayWritable(); - } + key = new LongWritable(); + value = new BytesRefArrayWritable(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/security/HdfsAuthorizationProvider.java b/hcatalog/core/src/main/java/org/apache/hcatalog/security/HdfsAuthorizationProvider.java index 080e154..c939f10 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/security/HdfsAuthorizationProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/security/HdfsAuthorizationProvider.java @@ -60,279 +60,279 @@ */ public class HdfsAuthorizationProvider extends HiveAuthorizationProviderBase { - protected Warehouse wh; - - //Config variables : create an enum to store them if we have more - private static final String PROXY_USER_NAME = "proxy.user.name"; - - public HdfsAuthorizationProvider() { - super(); + protected Warehouse wh; + + //Config variables : create an enum to store them if we have more + private static final String PROXY_USER_NAME = "proxy.user.name"; + + public HdfsAuthorizationProvider() { + super(); + } + + public HdfsAuthorizationProvider(Configuration conf) { + super(); + setConf(conf); + } + + @Override + public void init(Configuration conf) throws HiveException { + hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + try { + this.wh = new Warehouse(conf); + } catch (MetaException ex) { + throw new RuntimeException(ex); } - - public HdfsAuthorizationProvider(Configuration conf) { - super(); - setConf(conf); + } + + protected FsAction getFsAction(Privilege priv, Path path) { + + switch (priv.getPriv()) { + case ALL: + throw new AuthorizationException("no matching Action for Privilege.All"); + case ALTER_DATA: + return FsAction.WRITE; + case ALTER_METADATA: + return FsAction.WRITE; + case CREATE: + return FsAction.WRITE; + case DROP: + return FsAction.WRITE; + case INDEX: + return FsAction.WRITE; + case LOCK: + return FsAction.WRITE; + case SELECT: + return FsAction.READ; + case SHOW_DATABASE: + return FsAction.READ; + case UNKNOWN: + default: + throw new AuthorizationException("Unknown privilege"); } + } - @Override - public void init(Configuration conf) throws HiveException { - hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); - } + protected EnumSet getFsActions(Privilege[] privs, Path path) { + EnumSet actions = EnumSet.noneOf(FsAction.class); - @Override - public void setConf(Configuration conf) { - super.setConf(conf); - try { - this.wh = new Warehouse(conf); - } catch (MetaException ex) { - throw new RuntimeException(ex); - } + if (privs == null) { + return actions; } - protected FsAction getFsAction(Privilege priv, Path path) { - - switch (priv.getPriv()) { - case ALL: - throw new AuthorizationException("no matching Action for Privilege.All"); - case ALTER_DATA: - return FsAction.WRITE; - case ALTER_METADATA: - return FsAction.WRITE; - case CREATE: - return FsAction.WRITE; - case DROP: - return FsAction.WRITE; - case INDEX: - return FsAction.WRITE; - case LOCK: - return FsAction.WRITE; - case SELECT: - return FsAction.READ; - case SHOW_DATABASE: - return FsAction.READ; - case UNKNOWN: - default: - throw new AuthorizationException("Unknown privilege"); - } + for (Privilege priv : privs) { + actions.add(getFsAction(priv, path)); } - protected EnumSet getFsActions(Privilege[] privs, Path path) { - EnumSet actions = EnumSet.noneOf(FsAction.class); - - if (privs == null) { - return actions; - } + return actions; + } - for (Privilege priv : privs) { - actions.add(getFsAction(priv, path)); - } + private static final String DATABASE_WAREHOUSE_SUFFIX = ".db"; - return actions; + private Path getDefaultDatabasePath(String dbName) throws MetaException { + if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) { + return wh.getWhRoot(); } - - private static final String DATABASE_WAREHOUSE_SUFFIX = ".db"; - - private Path getDefaultDatabasePath(String dbName) throws MetaException { - if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) { - return wh.getWhRoot(); - } - return new Path(wh.getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); + return new Path(wh.getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); + } + + protected Path getDbLocation(Database db) throws HiveException { + try { + String location = db.getLocationUri(); + if (location == null) { + return getDefaultDatabasePath(db.getName()); + } else { + return wh.getDnsPath(wh.getDatabasePath(db)); + } + } catch (MetaException ex) { + throw new HiveException(ex.getMessage()); } - - protected Path getDbLocation(Database db) throws HiveException { - try { - String location = db.getLocationUri(); - if (location == null) { - return getDefaultDatabasePath(db.getName()); - } else { - return wh.getDnsPath(wh.getDatabasePath(db)); - } - } catch (MetaException ex) { - throw new HiveException(ex.getMessage()); - } + } + + @Override + public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //Authorize for global level permissions at the warehouse dir + Path root; + try { + root = wh.getWhRoot(); + authorize(root, readRequiredPriv, writeRequiredPriv); + } catch (MetaException ex) { + throw new HiveException(ex); } + } - @Override - public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //Authorize for global level permissions at the warehouse dir - Path root; - try { - root = wh.getWhRoot(); - authorize(root, readRequiredPriv, writeRequiredPriv); - } catch (MetaException ex) { - throw new HiveException(ex); - } + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (db == null) { + return; } - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (db == null) { - return; - } + Path path = getDbLocation(db); - Path path = getDbLocation(db); + authorize(path, readRequiredPriv, writeRequiredPriv); + } - authorize(path, readRequiredPriv, writeRequiredPriv); + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (table == null) { + return; } - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (table == null) { - return; - } - - //unlike Hive's model, this can be called at CREATE TABLE as well, since we should authorize - //against the table's declared location - Path path = null; - try { - if (table.getTTable().getSd().getLocation() == null - || table.getTTable().getSd().getLocation().isEmpty()) { - path = wh.getTablePath(hive_db.getDatabase(table.getDbName()), table.getTableName()); - } else { - path = table.getPath(); - } - } catch (MetaException ex) { - throw new HiveException(ex); - } - - authorize(path, readRequiredPriv, writeRequiredPriv); + //unlike Hive's model, this can be called at CREATE TABLE as well, since we should authorize + //against the table's declared location + Path path = null; + try { + if (table.getTTable().getSd().getLocation() == null + || table.getTTable().getSd().getLocation().isEmpty()) { + path = wh.getTablePath(hive_db.getDatabase(table.getDbName()), table.getTableName()); + } else { + path = table.getPath(); + } + } catch (MetaException ex) { + throw new HiveException(ex); } - //TODO: HiveAuthorizationProvider should expose this interface instead of #authorize(Partition, Privilege[], Privilege[]) - public void authorize(Table table, Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { + authorize(path, readRequiredPriv, writeRequiredPriv); + } - if (part == null || part.getLocation() == null) { - authorize(table, readRequiredPriv, writeRequiredPriv); - } else { - authorize(part.getPartitionPath(), readRequiredPriv, writeRequiredPriv); - } - } + //TODO: HiveAuthorizationProvider should expose this interface instead of #authorize(Partition, Privilege[], Privilege[]) + public void authorize(Table table, Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (part == null) { - return; - } - authorize(part.getTable(), part, readRequiredPriv, writeRequiredPriv); + if (part == null || part.getLocation() == null) { + authorize(table, readRequiredPriv, writeRequiredPriv); + } else { + authorize(part.getPartitionPath(), readRequiredPriv, writeRequiredPriv); } + } - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - //columns cannot live in different files, just check for partition level permissions - authorize(table, part, readRequiredPriv, writeRequiredPriv); + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (part == null) { + return; } - - /** - * Authorization privileges against a path. - * @param path a filesystem path - * @param readRequiredPriv a list of privileges needed for inputs. - * @param writeRequiredPriv a list of privileges needed for outputs. - */ - public void authorize(Path path, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - try { - EnumSet actions = getFsActions(readRequiredPriv, path); - actions.addAll(getFsActions(writeRequiredPriv, path)); - if (actions.isEmpty()) { - return; - } - - checkPermissions(getConf(), path, actions); - - } catch (AccessControlException ex) { - throw new AuthorizationException(ex); - } catch (LoginException ex) { - throw new AuthorizationException(ex); - } catch (IOException ex) { - throw new HiveException(ex); - } + authorize(part.getTable(), part, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + //columns cannot live in different files, just check for partition level permissions + authorize(table, part, readRequiredPriv, writeRequiredPriv); + } + + /** + * Authorization privileges against a path. + * @param path a filesystem path + * @param readRequiredPriv a list of privileges needed for inputs. + * @param writeRequiredPriv a list of privileges needed for outputs. + */ + public void authorize(Path path, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + try { + EnumSet actions = getFsActions(readRequiredPriv, path); + actions.addAll(getFsActions(writeRequiredPriv, path)); + if (actions.isEmpty()) { + return; + } + + checkPermissions(getConf(), path, actions); + + } catch (AccessControlException ex) { + throw new AuthorizationException(ex); + } catch (LoginException ex) { + throw new AuthorizationException(ex); + } catch (IOException ex) { + throw new HiveException(ex); } + } - /** - * Checks the permissions for the given path and current user on Hadoop FS. If the given path - * does not exists, it checks for it's parent folder. - */ - protected static void checkPermissions(final Configuration conf, final Path path, - final EnumSet actions) throws IOException, LoginException { + /** + * Checks the permissions for the given path and current user on Hadoop FS. If the given path + * does not exists, it checks for it's parent folder. + */ + protected static void checkPermissions(final Configuration conf, final Path path, + final EnumSet actions) throws IOException, LoginException { - if (path == null) { - throw new IllegalArgumentException("path is null"); - } + if (path == null) { + throw new IllegalArgumentException("path is null"); + } - HadoopShims shims = ShimLoader.getHadoopShims(); - final UserGroupInformation ugi; - if (conf.get(PROXY_USER_NAME) != null) { - ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME)); - } else { - ugi = shims.getUGIForConf(conf); - } - final String user = shims.getShortUserName(ugi); - - final FileSystem fs = path.getFileSystem(conf); - - if (fs.exists(path)) { - checkPermissions(fs, path, actions, user, ugi.getGroupNames()); - } else if (path.getParent() != null) { - // find the ancestor which exists to check it's permissions - Path par = path.getParent(); - while (par != null) { - if (fs.exists(par)) { - break; - } - par = par.getParent(); - } - - checkPermissions(fs, par, actions, user, ugi.getGroupNames()); + HadoopShims shims = ShimLoader.getHadoopShims(); + final UserGroupInformation ugi; + if (conf.get(PROXY_USER_NAME) != null) { + ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME)); + } else { + ugi = shims.getUGIForConf(conf); + } + final String user = shims.getShortUserName(ugi); + + final FileSystem fs = path.getFileSystem(conf); + + if (fs.exists(path)) { + checkPermissions(fs, path, actions, user, ugi.getGroupNames()); + } else if (path.getParent() != null) { + // find the ancestor which exists to check it's permissions + Path par = path.getParent(); + while (par != null) { + if (fs.exists(par)) { + break; } + par = par.getParent(); + } + + checkPermissions(fs, par, actions, user, ugi.getGroupNames()); + } + } + + /** + * Checks the permissions for the given path and current user on Hadoop FS. If the given path + * does not exists, it returns. + */ + @SuppressWarnings("deprecation") + protected static void checkPermissions(final FileSystem fs, final Path path, + final EnumSet actions, String user, String[] groups) throws IOException, + AccessControlException { + + final FileStatus stat; + + try { + stat = fs.getFileStatus(path); + } catch (FileNotFoundException fnfe) { + // File named by path doesn't exist; nothing to validate. + return; + } catch (org.apache.hadoop.fs.permission.AccessControlException ace) { + // Older hadoop version will throw this @deprecated Exception. + throw new AccessControlException(ace.getMessage()); } - /** - * Checks the permissions for the given path and current user on Hadoop FS. If the given path - * does not exists, it returns. - */ - @SuppressWarnings("deprecation") - protected static void checkPermissions(final FileSystem fs, final Path path, - final EnumSet actions, String user, String[] groups) throws IOException, - AccessControlException { - - final FileStatus stat; - - try { - stat = fs.getFileStatus(path); - } catch (FileNotFoundException fnfe) { - // File named by path doesn't exist; nothing to validate. - return; - } catch (org.apache.hadoop.fs.permission.AccessControlException ace) { - // Older hadoop version will throw this @deprecated Exception. - throw new AccessControlException(ace.getMessage()); - } + final FsPermission dirPerms = stat.getPermission(); + final String grp = stat.getGroup(); - final FsPermission dirPerms = stat.getPermission(); - final String grp = stat.getGroup(); - - for (FsAction action : actions) { - if (user.equals(stat.getOwner())) { - if (dirPerms.getUserAction().implies(action)) { - continue; - } - } - if (ArrayUtils.contains(groups, grp)) { - if (dirPerms.getGroupAction().implies(action)) { - continue; - } - } - if (dirPerms.getOtherAction().implies(action)) { - continue; - } - throw new AccessControlException("action " + action + " not permitted on path " - + path + " for user " + user); + for (FsAction action : actions) { + if (user.equals(stat.getOwner())) { + if (dirPerms.getUserAction().implies(action)) { + continue; + } + } + if (ArrayUtils.contains(groups, grp)) { + if (dirPerms.getGroupAction().implies(action)) { + continue; } + } + if (dirPerms.getOtherAction().implies(action)) { + continue; + } + throw new AccessControlException("action " + action + " not permitted on path " + + path + " for user " + user); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/security/StorageDelegationAuthorizationProvider.java b/hcatalog/core/src/main/java/org/apache/hcatalog/security/StorageDelegationAuthorizationProvider.java index ee75767..711c471 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/security/StorageDelegationAuthorizationProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/security/StorageDelegationAuthorizationProvider.java @@ -46,98 +46,98 @@ */ public class StorageDelegationAuthorizationProvider extends HiveAuthorizationProviderBase { - protected HiveAuthorizationProvider hdfsAuthorizer = new HdfsAuthorizationProvider(); - - protected static Map authProviders = new HashMap(); - - @Override - public void setConf(Configuration conf) { - super.setConf(conf); - hdfsAuthorizer.setConf(conf); - } - - @Override - public void init(Configuration conf) throws HiveException { - hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); - } - - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - super.setAuthenticator(authenticator); - hdfsAuthorizer.setAuthenticator(authenticator); - } - - static { - registerAuthProvider("org.apache.hadoop.hive.hbase.HBaseStorageHandler", - "org.apache.hcatalog.hbase.HBaseAuthorizationProvider"); - registerAuthProvider("org.apache.hcatalog.hbase.HBaseHCatStorageHandler", - "org.apache.hcatalog.hbase.HBaseAuthorizationProvider"); - } - - //workaround until Hive adds StorageHandler.getAuthorizationProvider(). Remove these parts afterwards - public static void registerAuthProvider(String storageHandlerClass, - String authProviderClass) { - authProviders.put(storageHandlerClass, authProviderClass); - } - - /** Returns the StorageHandler of the Table obtained from the HCatStorageHandler */ - protected HiveAuthorizationProvider getDelegate(Table table) throws HiveException { - HiveStorageHandler handler = table.getStorageHandler(); - - if (handler != null) { - if (handler instanceof HCatStorageHandler) { - return ((HCatStorageHandler) handler).getAuthorizationProvider(); - } else { - String authProviderClass = authProviders.get(handler.getClass().getCanonicalName()); - - if (authProviderClass != null) { - try { - ReflectionUtils.newInstance(getConf().getClassByName(authProviderClass), getConf()); - } catch (ClassNotFoundException ex) { - throw new HiveException("Cannot instantiate delegation AuthotizationProvider"); - } - } - - //else we do not have anything to delegate to - throw new HiveException(String.format("Storage Handler for table:%s is not an instance " + - "of HCatStorageHandler", table.getTableName())); - } - } else { - //return an authorizer for HDFS - return hdfsAuthorizer; + protected HiveAuthorizationProvider hdfsAuthorizer = new HdfsAuthorizationProvider(); + + protected static Map authProviders = new HashMap(); + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + hdfsAuthorizer.setConf(conf); + } + + @Override + public void init(Configuration conf) throws HiveException { + hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); + } + + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + super.setAuthenticator(authenticator); + hdfsAuthorizer.setAuthenticator(authenticator); + } + + static { + registerAuthProvider("org.apache.hadoop.hive.hbase.HBaseStorageHandler", + "org.apache.hcatalog.hbase.HBaseAuthorizationProvider"); + registerAuthProvider("org.apache.hcatalog.hbase.HBaseHCatStorageHandler", + "org.apache.hcatalog.hbase.HBaseAuthorizationProvider"); + } + + //workaround until Hive adds StorageHandler.getAuthorizationProvider(). Remove these parts afterwards + public static void registerAuthProvider(String storageHandlerClass, + String authProviderClass) { + authProviders.put(storageHandlerClass, authProviderClass); + } + + /** Returns the StorageHandler of the Table obtained from the HCatStorageHandler */ + protected HiveAuthorizationProvider getDelegate(Table table) throws HiveException { + HiveStorageHandler handler = table.getStorageHandler(); + + if (handler != null) { + if (handler instanceof HCatStorageHandler) { + return ((HCatStorageHandler) handler).getAuthorizationProvider(); + } else { + String authProviderClass = authProviders.get(handler.getClass().getCanonicalName()); + + if (authProviderClass != null) { + try { + ReflectionUtils.newInstance(getConf().getClassByName(authProviderClass), getConf()); + } catch (ClassNotFoundException ex) { + throw new HiveException("Cannot instantiate delegation AuthotizationProvider"); + } } - } - - @Override - public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //global authorizations against warehouse hdfs directory - hdfsAuthorizer.authorize(readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //db's are tied to a hdfs location - hdfsAuthorizer.authorize(db, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - getDelegate(table).authorize(table, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { - getDelegate(part.getTable()).authorize(part, readRequiredPriv, writeRequiredPriv); - } - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - getDelegate(table).authorize(table, part, columns, readRequiredPriv, writeRequiredPriv); + //else we do not have anything to delegate to + throw new HiveException(String.format("Storage Handler for table:%s is not an instance " + + "of HCatStorageHandler", table.getTableName())); + } + } else { + //return an authorizer for HDFS + return hdfsAuthorizer; } + } + + @Override + public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //global authorizations against warehouse hdfs directory + hdfsAuthorizer.authorize(readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //db's are tied to a hdfs location + hdfsAuthorizer.authorize(db, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + getDelegate(table).authorize(table, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { + getDelegate(part.getTable()).authorize(part, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + getDelegate(table).authorize(table, part, columns, readRequiredPriv, writeRequiredPriv); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hcatalog/storagehandler/DummyHCatAuthProvider.java b/hcatalog/core/src/main/java/org/apache/hcatalog/storagehandler/DummyHCatAuthProvider.java index b778292..955e57f 100644 --- a/hcatalog/core/src/main/java/org/apache/hcatalog/storagehandler/DummyHCatAuthProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hcatalog/storagehandler/DummyHCatAuthProvider.java @@ -38,108 +38,108 @@ */ class DummyHCatAuthProvider implements HiveAuthorizationProvider { - @Override - public Configuration getConf() { - return null; - } + @Override + public Configuration getConf() { + return null; + } - @Override - public void setConf(Configuration conf) { - } + @Override + public void setConf(Configuration conf) { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #init(org.apache.hadoop.conf.Configuration) + */ + @Override + public void init(Configuration conf) throws HiveException { + } - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } + @Override + public HiveAuthenticationProvider getAuthenticator() { + return null; + } - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.metastore.api.Database, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.metastore.api.Database, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatCli.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatCli.java index 12eec18..c1fdf6e 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatCli.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatCli.java @@ -54,278 +54,278 @@ public class HCatCli { - @SuppressWarnings("static-access") - public static void main(String[] args) { + @SuppressWarnings("static-access") + public static void main(String[] args) { - try { - LogUtils.initHiveLog4j(); - } catch (LogInitializationException e) { + try { + LogUtils.initHiveLog4j(); + } catch (LogInitializationException e) { - } - - CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class)); - ss.in = System.in; - try { - ss.out = new PrintStream(System.out, true, "UTF-8"); - ss.err = new PrintStream(System.err, true, "UTF-8"); - } catch (UnsupportedEncodingException e) { - System.exit(1); - } - - HiveConf conf = ss.getConf(); - - HiveConf.setVar(conf, ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); - - SessionState.start(ss); - - Options options = new Options(); - - // -e 'quoted-query-string' - options.addOption(OptionBuilder - .hasArg() - .withArgName("exec") - .withDescription("hcat command given from command line") - .create('e')); - - // -f - options.addOption(OptionBuilder - .hasArg() - .withArgName("file") - .withDescription("hcat commands in file") - .create('f')); - - // -g - options.addOption(OptionBuilder - .hasArg(). - withArgName("group"). - withDescription("group for the db/table specified in CREATE statement"). - create('g')); - - // -p - options.addOption(OptionBuilder - .hasArg() - .withArgName("perms") - .withDescription("permissions for the db/table specified in CREATE statement") - .create('p')); - - // -D - options.addOption(OptionBuilder - .hasArgs(2) - .withArgName("property=value") - .withValueSeparator() - .withDescription("use hadoop value for given property") - .create('D')); - - // [-h|--help] - options.addOption(new Option("h", "help", false, "Print help information")); - - Parser parser = new GnuParser(); - CommandLine cmdLine = null; - - try { - cmdLine = parser.parse(options, args); - - } catch (ParseException e) { - printUsage(options, ss.err); - System.exit(1); - } - // -e - String execString = (String) cmdLine.getOptionValue('e'); - // -f - String fileName = (String) cmdLine.getOptionValue('f'); - // -h - if (cmdLine.hasOption('h')) { - printUsage(options, ss.out); - System.exit(0); - } - - if (execString != null && fileName != null) { - ss.err.println("The '-e' and '-f' options cannot be specified simultaneously"); - printUsage(options, ss.err); - System.exit(1); - } - - // -p - String perms = (String) cmdLine.getOptionValue('p'); - if (perms != null) { - validatePermissions(ss, conf, perms); - } - - // -g - String grp = (String) cmdLine.getOptionValue('g'); - if (grp != null) { - conf.set(HCatConstants.HCAT_GROUP, grp); - } - - // -D - setConfProperties(conf, cmdLine.getOptionProperties("D")); - - if (execString != null) { - System.exit(processLine(execString)); - } + } - try { - if (fileName != null) { - System.exit(processFile(fileName)); - } - } catch (FileNotFoundException e) { - ss.err.println("Input file not found. (" + e.getMessage() + ")"); - System.exit(1); - } catch (IOException e) { - ss.err.println("Could not open input file for reading. (" + e.getMessage() + ")"); - System.exit(1); - } + CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class)); + ss.in = System.in; + try { + ss.out = new PrintStream(System.out, true, "UTF-8"); + ss.err = new PrintStream(System.err, true, "UTF-8"); + } catch (UnsupportedEncodingException e) { + System.exit(1); + } - // -h - printUsage(options, ss.err); - System.exit(1); + HiveConf conf = ss.getConf(); + + HiveConf.setVar(conf, ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); + + SessionState.start(ss); + + Options options = new Options(); + + // -e 'quoted-query-string' + options.addOption(OptionBuilder + .hasArg() + .withArgName("exec") + .withDescription("hcat command given from command line") + .create('e')); + + // -f + options.addOption(OptionBuilder + .hasArg() + .withArgName("file") + .withDescription("hcat commands in file") + .create('f')); + + // -g + options.addOption(OptionBuilder + .hasArg(). + withArgName("group"). + withDescription("group for the db/table specified in CREATE statement"). + create('g')); + + // -p + options.addOption(OptionBuilder + .hasArg() + .withArgName("perms") + .withDescription("permissions for the db/table specified in CREATE statement") + .create('p')); + + // -D + options.addOption(OptionBuilder + .hasArgs(2) + .withArgName("property=value") + .withValueSeparator() + .withDescription("use hadoop value for given property") + .create('D')); + + // [-h|--help] + options.addOption(new Option("h", "help", false, "Print help information")); + + Parser parser = new GnuParser(); + CommandLine cmdLine = null; + + try { + cmdLine = parser.parse(options, args); + + } catch (ParseException e) { + printUsage(options, ss.err); + System.exit(1); + } + // -e + String execString = (String) cmdLine.getOptionValue('e'); + // -f + String fileName = (String) cmdLine.getOptionValue('f'); + // -h + if (cmdLine.hasOption('h')) { + printUsage(options, ss.out); + System.exit(0); } - private static void setConfProperties(HiveConf conf, Properties props) { - for (java.util.Map.Entry e : props.entrySet()) - conf.set((String) e.getKey(), (String) e.getValue()); + if (execString != null && fileName != null) { + ss.err.println("The '-e' and '-f' options cannot be specified simultaneously"); + printUsage(options, ss.err); + System.exit(1); } - private static int processLine(String line) { - int ret = 0; + // -p + String perms = (String) cmdLine.getOptionValue('p'); + if (perms != null) { + validatePermissions(ss, conf, perms); + } - String command = ""; - for (String oneCmd : line.split(";")) { + // -g + String grp = (String) cmdLine.getOptionValue('g'); + if (grp != null) { + conf.set(HCatConstants.HCAT_GROUP, grp); + } - if (StringUtils.endsWith(oneCmd, "\\")) { - command += StringUtils.chop(oneCmd) + ";"; - continue; - } else { - command += oneCmd; - } - if (StringUtils.isBlank(command)) { - continue; - } + // -D + setConfProperties(conf, cmdLine.getOptionProperties("D")); - ret = processCmd(command); - command = ""; - } - return ret; + if (execString != null) { + System.exit(processLine(execString)); } - private static int processFile(String fileName) throws IOException { - FileReader fileReader = null; - BufferedReader reader = null; - try { - fileReader = new FileReader(fileName); - reader = new BufferedReader(fileReader); - String line; - StringBuilder qsb = new StringBuilder(); - - while ((line = reader.readLine()) != null) { - qsb.append(line + "\n"); - } - - return (processLine(qsb.toString())); - } finally { - if (fileReader != null) { - fileReader.close(); - } - if (reader != null) { - reader.close(); - } - } + try { + if (fileName != null) { + System.exit(processFile(fileName)); + } + } catch (FileNotFoundException e) { + ss.err.println("Input file not found. (" + e.getMessage() + ")"); + System.exit(1); + } catch (IOException e) { + ss.err.println("Could not open input file for reading. (" + e.getMessage() + ")"); + System.exit(1); } - private static int processCmd(String cmd) { - - SessionState ss = SessionState.get(); - long start = System.currentTimeMillis(); + // -h + printUsage(options, ss.err); + System.exit(1); + } + + private static void setConfProperties(HiveConf conf, Properties props) { + for (java.util.Map.Entry e : props.entrySet()) + conf.set((String) e.getKey(), (String) e.getValue()); + } + + private static int processLine(String line) { + int ret = 0; + + String command = ""; + for (String oneCmd : line.split(";")) { + + if (StringUtils.endsWith(oneCmd, "\\")) { + command += StringUtils.chop(oneCmd) + ";"; + continue; + } else { + command += oneCmd; + } + if (StringUtils.isBlank(command)) { + continue; + } + + ret = processCmd(command); + command = ""; + } + return ret; + } + + private static int processFile(String fileName) throws IOException { + FileReader fileReader = null; + BufferedReader reader = null; + try { + fileReader = new FileReader(fileName); + reader = new BufferedReader(fileReader); + String line; + StringBuilder qsb = new StringBuilder(); + + while ((line = reader.readLine()) != null) { + qsb.append(line + "\n"); + } + + return (processLine(qsb.toString())); + } finally { + if (fileReader != null) { + fileReader.close(); + } + if (reader != null) { + reader.close(); + } + } + } - cmd = cmd.trim(); - String firstToken = cmd.split("\\s+")[0].trim(); + private static int processCmd(String cmd) { - if (firstToken.equalsIgnoreCase("set")) { - return new SetProcessor().run(cmd.substring(firstToken.length()).trim()).getResponseCode(); - } else if (firstToken.equalsIgnoreCase("dfs")) { - return new DfsProcessor(ss.getConf()).run(cmd.substring(firstToken.length()).trim()).getResponseCode(); - } + SessionState ss = SessionState.get(); + long start = System.currentTimeMillis(); - HCatDriver driver = new HCatDriver(); + cmd = cmd.trim(); + String firstToken = cmd.split("\\s+")[0].trim(); - int ret = driver.run(cmd).getResponseCode(); + if (firstToken.equalsIgnoreCase("set")) { + return new SetProcessor().run(cmd.substring(firstToken.length()).trim()).getResponseCode(); + } else if (firstToken.equalsIgnoreCase("dfs")) { + return new DfsProcessor(ss.getConf()).run(cmd.substring(firstToken.length()).trim()).getResponseCode(); + } - if (ret != 0) { - driver.close(); - System.exit(ret); - } + HCatDriver driver = new HCatDriver(); - ArrayList res = new ArrayList(); - try { - while (driver.getResults(res)) { - for (String r : res) { - ss.out.println(r); - } - res.clear(); - } - } catch (IOException e) { - ss.err.println("Failed with exception " + e.getClass().getName() + ":" - + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); - ret = 1; - } catch (CommandNeedRetryException e) { - ss.err.println("Failed with exception " + e.getClass().getName() + ":" - + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); - ret = 1; - } + int ret = driver.run(cmd).getResponseCode(); - int cret = driver.close(); - if (ret == 0) { - ret = cret; - } + if (ret != 0) { + driver.close(); + System.exit(ret); + } - long end = System.currentTimeMillis(); - if (end > start) { - double timeTaken = (end - start) / 1000.0; - ss.err.println("Time taken: " + timeTaken + " seconds"); + ArrayList res = new ArrayList(); + try { + while (driver.getResults(res)) { + for (String r : res) { + ss.out.println(r); } - return ret; + res.clear(); + } + } catch (IOException e) { + ss.err.println("Failed with exception " + e.getClass().getName() + ":" + + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + ret = 1; + } catch (CommandNeedRetryException e) { + ss.err.println("Failed with exception " + e.getClass().getName() + ":" + + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + ret = 1; } - private static void printUsage(Options options, OutputStream os) { - PrintWriter pw = new PrintWriter(os); - new HelpFormatter().printHelp(pw, 2 * HelpFormatter.DEFAULT_WIDTH, - "hcat { -e \"\" | -f \"\" } [ -g \"\" ] [ -p \"\" ] [ -D\"=\" ]", - null, options, HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, - null, false); - pw.flush(); + int cret = driver.close(); + if (ret == 0) { + ret = cret; } - private static void validatePermissions(CliSessionState ss, HiveConf conf, String perms) { - perms = perms.trim(); - FsPermission fp = null; - - if (perms.matches("^\\s*([r,w,x,-]{9})\\s*$")) { - fp = FsPermission.valueOf("d" + perms); - } else if (perms.matches("^\\s*([0-7]{3})\\s*$")) { - fp = new FsPermission(Short.decode("0" + perms)); - } else { - ss.err.println("Invalid permission specification: " + perms); - System.exit(1); - } + long end = System.currentTimeMillis(); + if (end > start) { + double timeTaken = (end - start) / 1000.0; + ss.err.println("Time taken: " + timeTaken + " seconds"); + } + return ret; + } + + private static void printUsage(Options options, OutputStream os) { + PrintWriter pw = new PrintWriter(os); + new HelpFormatter().printHelp(pw, 2 * HelpFormatter.DEFAULT_WIDTH, + "hcat { -e \"\" | -f \"\" } [ -g \"\" ] [ -p \"\" ] [ -D\"=\" ]", + null, options, HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, + null, false); + pw.flush(); + } + + private static void validatePermissions(CliSessionState ss, HiveConf conf, String perms) { + perms = perms.trim(); + FsPermission fp = null; + + if (perms.matches("^\\s*([r,w,x,-]{9})\\s*$")) { + fp = FsPermission.valueOf("d" + perms); + } else if (perms.matches("^\\s*([0-7]{3})\\s*$")) { + fp = new FsPermission(Short.decode("0" + perms)); + } else { + ss.err.println("Invalid permission specification: " + perms); + System.exit(1); + } - if (!HCatUtil.validateMorePermissive(fp.getUserAction(), fp.getGroupAction())) { - ss.err.println("Invalid permission specification: " + perms + " : user permissions must be more permissive than group permission "); - System.exit(1); - } - if (!HCatUtil.validateMorePermissive(fp.getGroupAction(), fp.getOtherAction())) { - ss.err.println("Invalid permission specification: " + perms + " : group permissions must be more permissive than other permission "); - System.exit(1); - } - if ((!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getUserAction())) || - (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getGroupAction())) || - (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getOtherAction()))) { - ss.err.println("Invalid permission specification: " + perms + " : permissions must have execute permissions if read or write permissions are specified "); - System.exit(1); - } + if (!HCatUtil.validateMorePermissive(fp.getUserAction(), fp.getGroupAction())) { + ss.err.println("Invalid permission specification: " + perms + " : user permissions must be more permissive than group permission "); + System.exit(1); + } + if (!HCatUtil.validateMorePermissive(fp.getGroupAction(), fp.getOtherAction())) { + ss.err.println("Invalid permission specification: " + perms + " : group permissions must be more permissive than other permission "); + System.exit(1); + } + if ((!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getUserAction())) || + (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getGroupAction())) || + (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getOtherAction()))) { + ss.err.println("Invalid permission specification: " + perms + " : permissions must have execute permissions if read or write permissions are specified "); + System.exit(1); + } - conf.set(HCatConstants.HCAT_PERMS, "d" + fp.toString()); + conf.set(HCatConstants.HCAT_PERMS, "d" + fp.toString()); - } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatDriver.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatDriver.java index c98ed37..07abd42 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatDriver.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatDriver.java @@ -35,109 +35,109 @@ public class HCatDriver extends Driver { - @Override - public CommandProcessorResponse run(String command) { + @Override + public CommandProcessorResponse run(String command) { + + CommandProcessorResponse cpr = null; + try { + cpr = super.run(command); + } catch (CommandNeedRetryException e) { + return new CommandProcessorResponse(-1, e.toString(), ""); + } - CommandProcessorResponse cpr = null; - try { - cpr = super.run(command); - } catch (CommandNeedRetryException e) { - return new CommandProcessorResponse(-1, e.toString(), ""); - } + SessionState ss = SessionState.get(); + + if (cpr.getResponseCode() == 0) { + // Only attempt to do this, if cmd was successful. + int rc = setFSPermsNGrp(ss); + cpr = new CommandProcessorResponse(rc); + } + // reset conf vars + ss.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, ""); + ss.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, ""); - SessionState ss = SessionState.get(); + return cpr; + } - if (cpr.getResponseCode() == 0) { - // Only attempt to do this, if cmd was successful. - int rc = setFSPermsNGrp(ss); - cpr = new CommandProcessorResponse(rc); - } - // reset conf vars - ss.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, ""); - ss.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, ""); + private int setFSPermsNGrp(SessionState ss) { - return cpr; + Configuration conf = ss.getConf(); + + String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, ""); + if (tblName.isEmpty()) { + tblName = conf.get("import.destination.table", ""); + conf.set("import.destination.table", ""); } + String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, ""); + String grp = conf.get(HCatConstants.HCAT_GROUP, null); + String permsStr = conf.get(HCatConstants.HCAT_PERMS, null); - private int setFSPermsNGrp(SessionState ss) { + if (tblName.isEmpty() && dbName.isEmpty()) { + // it wasn't create db/table + return 0; + } - Configuration conf = ss.getConf(); + if (null == grp && null == permsStr) { + // there were no grp and perms to begin with. + return 0; + } - String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, ""); - if (tblName.isEmpty()) { - tblName = conf.get("import.destination.table", ""); - conf.set("import.destination.table", ""); - } - String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, ""); - String grp = conf.get(HCatConstants.HCAT_GROUP, null); - String permsStr = conf.get(HCatConstants.HCAT_PERMS, null); + FsPermission perms = FsPermission.valueOf(permsStr); - if (tblName.isEmpty() && dbName.isEmpty()) { - // it wasn't create db/table - return 0; - } + if (!tblName.isEmpty()) { + Hive db = null; + try { + db = Hive.get(); + Table tbl = db.getTable(tblName); + Path tblPath = tbl.getPath(); - if (null == grp && null == permsStr) { - // there were no grp and perms to begin with. - return 0; + FileSystem fs = tblPath.getFileSystem(conf); + if (null != perms) { + fs.setPermission(tblPath, perms); } - - FsPermission perms = FsPermission.valueOf(permsStr); - - if (!tblName.isEmpty()) { - Hive db = null; - try { - db = Hive.get(); - Table tbl = db.getTable(tblName); - Path tblPath = tbl.getPath(); - - FileSystem fs = tblPath.getFileSystem(conf); - if (null != perms) { - fs.setPermission(tblPath, perms); - } - if (null != grp) { - fs.setOwner(tblPath, null, grp); - } - return 0; - - } catch (Exception e) { - ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage())); - try { // We need to drop the table. - if (null != db) { - db.dropTable(tblName); - } - } catch (HiveException he) { - ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage())); - } - return 1; - } - } else { - // looks like a db operation - if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { - // We dont set perms or groups for default dir. - return 0; - } else { - try { - Hive db = Hive.get(); - Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName)); - FileSystem fs = dbPath.getFileSystem(conf); - if (perms != null) { - fs.setPermission(dbPath, perms); - } - if (null != grp) { - fs.setOwner(dbPath, null, grp); - } - return 0; - } catch (Exception e) { - ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage())); - try { - Hive.get().dropDatabase(dbName); - } catch (Exception e1) { - ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage())); - } - return 1; - } - } + if (null != grp) { + fs.setOwner(tblPath, null, grp); + } + return 0; + + } catch (Exception e) { + ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage())); + try { // We need to drop the table. + if (null != db) { + db.dropTable(tblName); + } + } catch (HiveException he) { + ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage())); + } + return 1; + } + } else { + // looks like a db operation + if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { + // We dont set perms or groups for default dir. + return 0; + } else { + try { + Hive db = Hive.get(); + Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName)); + FileSystem fs = dbPath.getFileSystem(conf); + if (perms != null) { + fs.setPermission(dbPath, perms); + } + if (null != grp) { + fs.setOwner(dbPath, null, grp); + } + return 0; + } catch (Exception e) { + ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage())); + try { + Hive.get().dropDatabase(dbName); + } catch (Exception e1) { + ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage())); + } + return 1; } + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java index 0720f0a..887a64d 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java @@ -37,60 +37,60 @@ final class CreateDatabaseHook extends HCatSemanticAnalyzerBase { - String databaseName; + String databaseName; - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) - throws SemanticException { + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) + throws SemanticException { - Hive db; - try { - db = context.getHive(); - } catch (HiveException e) { - throw new SemanticException("Couldn't get Hive DB instance in semantic analysis phase.", e); - } + Hive db; + try { + db = context.getHive(); + } catch (HiveException e) { + throw new SemanticException("Couldn't get Hive DB instance in semantic analysis phase.", e); + } - // Analyze and create tbl properties object - int numCh = ast.getChildCount(); + // Analyze and create tbl properties object + int numCh = ast.getChildCount(); - databaseName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); + databaseName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); - for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode) ast.getChild(num); + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); - switch (child.getToken().getType()) { + switch (child.getToken().getType()) { - case HiveParser.TOK_IFNOTEXISTS: - try { - List dbs = db.getDatabasesByPattern(databaseName); - if (dbs != null && dbs.size() > 0) { // db exists - return ast; - } - } catch (HiveException e) { - throw new SemanticException(e); - } - break; - } + case HiveParser.TOK_IFNOTEXISTS: + try { + List dbs = db.getDatabasesByPattern(databaseName); + if (dbs != null && dbs.size() > 0) { // db exists + return ast; + } + } catch (HiveException e) { + throw new SemanticException(e); } - - return ast; + break; + } } - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - context.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, databaseName); - super.postAnalyze(context, rootTasks); - } + return ast; + } - @Override - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, - Hive hive, DDLWork work) throws HiveException { - CreateDatabaseDesc createDb = work.getCreateDatabaseDesc(); - if (createDb != null) { - Database db = new Database(createDb.getName(), createDb.getComment(), - createDb.getLocationUri(), createDb.getDatabaseProperties()); - authorize(db, Privilege.CREATE); - } + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + context.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, databaseName); + super.postAnalyze(context, rootTasks); + } + + @Override + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, + Hive hive, DDLWork work) throws HiveException { + CreateDatabaseDesc createDb = work.getCreateDatabaseDesc(); + if (createDb != null) { + Database db = new Database(createDb.getName(), createDb.getComment(), + createDb.getLocationUri(), createDb.getDatabaseProperties()); + authorize(db, Privilege.CREATE); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java index 496ace6..b28835c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java @@ -48,198 +48,198 @@ final class CreateTableHook extends HCatSemanticAnalyzerBase { - private String tableName; + private String tableName; + + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, + ASTNode ast) throws SemanticException { + + Hive db; + try { + db = context.getHive(); + } catch (HiveException e) { + throw new SemanticException( + "Couldn't get Hive DB instance in semantic analysis phase.", + e); + } + + // Analyze and create tbl properties object + int numCh = ast.getChildCount(); + + String inputFormat = null, outputFormat = null; + tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast + .getChild(0)); + boolean likeTable = false; + + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); + + switch (child.getToken().getType()) { + + case HiveParser.TOK_QUERY: // CTAS + throw new SemanticException( + "Operation not supported. Create table as " + + "Select is not a valid operation."); + + case HiveParser.TOK_TABLEBUCKETS: + break; + + case HiveParser.TOK_TBLSEQUENCEFILE: + inputFormat = HCatConstants.SEQUENCEFILE_INPUT; + outputFormat = HCatConstants.SEQUENCEFILE_OUTPUT; + break; - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, - ASTNode ast) throws SemanticException { + case HiveParser.TOK_TBLTEXTFILE: + inputFormat = org.apache.hadoop.mapred.TextInputFormat.class.getName(); + outputFormat = org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat.class.getName(); - Hive db; + break; + + case HiveParser.TOK_LIKETABLE: + likeTable = true; + break; + + case HiveParser.TOK_IFNOTEXISTS: try { - db = context.getHive(); + List tables = db.getTablesByPattern(tableName); + if (tables != null && tables.size() > 0) { // table + // exists + return ast; + } } catch (HiveException e) { - throw new SemanticException( - "Couldn't get Hive DB instance in semantic analysis phase.", - e); + throw new SemanticException(e); } + break; - // Analyze and create tbl properties object - int numCh = ast.getChildCount(); - - String inputFormat = null, outputFormat = null; - tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast - .getChild(0)); - boolean likeTable = false; - - for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode) ast.getChild(num); - - switch (child.getToken().getType()) { - - case HiveParser.TOK_QUERY: // CTAS - throw new SemanticException( - "Operation not supported. Create table as " + - "Select is not a valid operation."); - - case HiveParser.TOK_TABLEBUCKETS: - break; - - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = HCatConstants.SEQUENCEFILE_INPUT; - outputFormat = HCatConstants.SEQUENCEFILE_OUTPUT; - break; - - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = org.apache.hadoop.mapred.TextInputFormat.class.getName(); - outputFormat = org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat.class.getName(); - - break; - - case HiveParser.TOK_LIKETABLE: - likeTable = true; - break; - - case HiveParser.TOK_IFNOTEXISTS: - try { - List tables = db.getTablesByPattern(tableName); - if (tables != null && tables.size() > 0) { // table - // exists - return ast; - } - } catch (HiveException e) { - throw new SemanticException(e); - } - break; - - case HiveParser.TOK_TABLEPARTCOLS: - List partCols = BaseSemanticAnalyzer - .getColumns((ASTNode) child.getChild(0), false); - for (FieldSchema fs : partCols) { - if (!fs.getType().equalsIgnoreCase("string")) { - throw new SemanticException( - "Operation not supported. HCatalog only " + - "supports partition columns of type string. " - + "For column: " - + fs.getName() - + " Found type: " + fs.getType()); - } - } - break; - - case HiveParser.TOK_STORAGEHANDLER: - String storageHandler = BaseSemanticAnalyzer - .unescapeSQLString(child.getChild(0).getText()); - if (org.apache.commons.lang.StringUtils - .isNotEmpty(storageHandler)) { - return ast; - } - - break; - - case HiveParser.TOK_TABLEFILEFORMAT: - if (child.getChildCount() < 2) { - throw new SemanticException( - "Incomplete specification of File Format. " + - "You must provide InputFormat, OutputFormat."); - } - inputFormat = BaseSemanticAnalyzer.unescapeSQLString(child - .getChild(0).getText()); - outputFormat = BaseSemanticAnalyzer.unescapeSQLString(child - .getChild(1).getText()); - break; - - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFileInputFormat.class.getName(); - outputFormat = RCFileOutputFormat.class.getName(); - break; - - } + case HiveParser.TOK_TABLEPARTCOLS: + List partCols = BaseSemanticAnalyzer + .getColumns((ASTNode) child.getChild(0), false); + for (FieldSchema fs : partCols) { + if (!fs.getType().equalsIgnoreCase("string")) { + throw new SemanticException( + "Operation not supported. HCatalog only " + + "supports partition columns of type string. " + + "For column: " + + fs.getName() + + " Found type: " + fs.getType()); + } + } + break; + + case HiveParser.TOK_STORAGEHANDLER: + String storageHandler = BaseSemanticAnalyzer + .unescapeSQLString(child.getChild(0).getText()); + if (org.apache.commons.lang.StringUtils + .isNotEmpty(storageHandler)) { + return ast; } - if (!likeTable && (inputFormat == null || outputFormat == null)) { - throw new SemanticException( - "STORED AS specification is either incomplete or incorrect."); + break; + + case HiveParser.TOK_TABLEFILEFORMAT: + if (child.getChildCount() < 2) { + throw new SemanticException( + "Incomplete specification of File Format. " + + "You must provide InputFormat, OutputFormat."); } + inputFormat = BaseSemanticAnalyzer.unescapeSQLString(child + .getChild(0).getText()); + outputFormat = BaseSemanticAnalyzer.unescapeSQLString(child + .getChild(1).getText()); + break; + + case HiveParser.TOK_TBLRCFILE: + inputFormat = RCFileInputFormat.class.getName(); + outputFormat = RCFileOutputFormat.class.getName(); + break; + + } + } + + if (!likeTable && (inputFormat == null || outputFormat == null)) { + throw new SemanticException( + "STORED AS specification is either incomplete or incorrect."); + } + + return ast; + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) + throws SemanticException { + + if (rootTasks.size() == 0) { + // There will be no DDL task created in case if its CREATE TABLE IF + // NOT EXISTS + return; + } + CreateTableDesc desc = ((DDLTask) rootTasks.get(rootTasks.size() - 1)) + .getWork().getCreateTblDesc(); + if (desc == null) { + // Desc will be null if its CREATE TABLE LIKE. Desc will be + // contained in CreateTableLikeDesc. Currently, HCat disallows CTLT in + // pre-hook. So, desc can never be null. + return; + } + Map tblProps = desc.getTblProps(); + if (tblProps == null) { + // tblProps will be null if user didnt use tblprops in his CREATE + // TABLE cmd. + tblProps = new HashMap(); - return ast; } - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) - throws SemanticException { + // first check if we will allow the user to create table. + String storageHandler = desc.getStorageHandler(); + if (StringUtils.isEmpty(storageHandler)) { + } else { + try { + HCatStorageHandler storageHandlerInst = HCatUtil + .getStorageHandler(context.getConf(), + desc.getStorageHandler(), + desc.getSerName(), + desc.getInputFormat(), + desc.getOutputFormat()); + //Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if + //StorageDelegationAuthorizationProvider is used. + } catch (IOException e) { + throw new SemanticException(e); + } + } - if (rootTasks.size() == 0) { - // There will be no DDL task created in case if its CREATE TABLE IF - // NOT EXISTS - return; + if (desc != null) { + try { + Table table = context.getHive().newTable(desc.getTableName()); + if (desc.getLocation() != null) { + table.setDataLocation(new Path(desc.getLocation()).toUri()); } - CreateTableDesc desc = ((DDLTask) rootTasks.get(rootTasks.size() - 1)) - .getWork().getCreateTblDesc(); - if (desc == null) { - // Desc will be null if its CREATE TABLE LIKE. Desc will be - // contained in CreateTableLikeDesc. Currently, HCat disallows CTLT in - // pre-hook. So, desc can never be null. - return; + if (desc.getStorageHandler() != null) { + table.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + desc.getStorageHandler()); } - Map tblProps = desc.getTblProps(); - if (tblProps == null) { - // tblProps will be null if user didnt use tblprops in his CREATE - // TABLE cmd. - tblProps = new HashMap(); - + for (Map.Entry prop : tblProps.entrySet()) { + table.setProperty(prop.getKey(), prop.getValue()); } - - // first check if we will allow the user to create table. - String storageHandler = desc.getStorageHandler(); - if (StringUtils.isEmpty(storageHandler)) { - } else { - try { - HCatStorageHandler storageHandlerInst = HCatUtil - .getStorageHandler(context.getConf(), - desc.getStorageHandler(), - desc.getSerName(), - desc.getInputFormat(), - desc.getOutputFormat()); - //Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if - //StorageDelegationAuthorizationProvider is used. - } catch (IOException e) { - throw new SemanticException(e); - } + for (Map.Entry prop : desc.getSerdeProps().entrySet()) { + table.setSerdeParam(prop.getKey(), prop.getValue()); } + //TODO: set other Table properties as needed - if (desc != null) { - try { - Table table = context.getHive().newTable(desc.getTableName()); - if (desc.getLocation() != null) { - table.setDataLocation(new Path(desc.getLocation()).toUri()); - } - if (desc.getStorageHandler() != null) { - table.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - desc.getStorageHandler()); - } - for (Map.Entry prop : tblProps.entrySet()) { - table.setProperty(prop.getKey(), prop.getValue()); - } - for (Map.Entry prop : desc.getSerdeProps().entrySet()) { - table.setSerdeParam(prop.getKey(), prop.getValue()); - } - //TODO: set other Table properties as needed - - //authorize against the table operation so that location permissions can be checked if any - - if (HiveConf.getBoolVar(context.getConf(), - HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - authorize(table, Privilege.CREATE); - } - } catch (HiveException ex) { - throw new SemanticException(ex); - } - } + //authorize against the table operation so that location permissions can be checked if any - desc.setTblProps(tblProps); - context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName); + if (HiveConf.getBoolVar(context.getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + authorize(table, Privilege.CREATE); + } + } catch (HiveException ex) { + throw new SemanticException(ex); + } } + + desc.setTblProps(tblProps); + context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java index 58caed4..75837ef 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java @@ -51,325 +51,325 @@ public class HCatSemanticAnalyzer extends HCatSemanticAnalyzerBase { - private AbstractSemanticAnalyzerHook hook; - private ASTNode ast; - - - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) - throws SemanticException { - - this.ast = ast; - switch (ast.getToken().getType()) { - - // HCat wants to intercept following tokens and special-handle them. - case HiveParser.TOK_CREATETABLE: - hook = new CreateTableHook(); - return hook.preAnalyze(context, ast); - - case HiveParser.TOK_CREATEDATABASE: - hook = new CreateDatabaseHook(); - return hook.preAnalyze(context, ast); - - case HiveParser.TOK_ALTERTABLE_PARTITION: - if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) { - return ast; - } else if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_ALTERPARTS_MERGEFILES) { - // unsupported - throw new SemanticException("Operation not supported."); - } else { - return ast; - } - - // HCat will allow these operations to be performed. - // Database DDL - case HiveParser.TOK_SHOWDATABASES: - case HiveParser.TOK_DROPDATABASE: - case HiveParser.TOK_SWITCHDATABASE: - case HiveParser.TOK_DESCDATABASE: - case HiveParser.TOK_ALTERDATABASE_PROPERTIES: - - // Index DDL - case HiveParser.TOK_ALTERINDEX_PROPERTIES: - case HiveParser.TOK_CREATEINDEX: - case HiveParser.TOK_DROPINDEX: - case HiveParser.TOK_SHOWINDEXES: - - // View DDL - // "alter view add partition" does not work because of the nature of implementation - // of the DDL in hive. Hive will internally invoke another Driver on the select statement, - // and HCat does not let "select" statement through. I cannot find a way to get around it - // without modifying hive code. So just leave it unsupported. - //case HiveParser.TOK_ALTERVIEW_ADDPARTS: - case HiveParser.TOK_ALTERVIEW_DROPPARTS: - case HiveParser.TOK_ALTERVIEW_PROPERTIES: - case HiveParser.TOK_ALTERVIEW_RENAME: - case HiveParser.TOK_CREATEVIEW: - case HiveParser.TOK_DROPVIEW: - - // Authorization DDL - case HiveParser.TOK_CREATEROLE: - case HiveParser.TOK_DROPROLE: - case HiveParser.TOK_GRANT_ROLE: - case HiveParser.TOK_GRANT_WITH_OPTION: - case HiveParser.TOK_GRANT: - case HiveParser.TOK_REVOKE_ROLE: - case HiveParser.TOK_REVOKE: - case HiveParser.TOK_SHOW_GRANT: - case HiveParser.TOK_SHOW_ROLE_GRANT: - - // Misc DDL - case HiveParser.TOK_LOCKTABLE: - case HiveParser.TOK_UNLOCKTABLE: - case HiveParser.TOK_SHOWLOCKS: - case HiveParser.TOK_DESCFUNCTION: - case HiveParser.TOK_SHOWFUNCTIONS: - case HiveParser.TOK_EXPLAIN: - - // Table DDL - case HiveParser.TOK_ALTERTABLE_ADDPARTS: - case HiveParser.TOK_ALTERTABLE_ADDCOLS: - case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: - case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: - case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: - case HiveParser.TOK_ALTERTABLE_DROPPARTS: - case HiveParser.TOK_ALTERTABLE_PROPERTIES: - case HiveParser.TOK_ALTERTABLE_RENAME: - case HiveParser.TOK_ALTERTABLE_RENAMECOL: - case HiveParser.TOK_ALTERTABLE_REPLACECOLS: - case HiveParser.TOK_ALTERTABLE_SERIALIZER: - case HiveParser.TOK_ALTERTABLE_TOUCH: - case HiveParser.TOK_DESCTABLE: - case HiveParser.TOK_DROPTABLE: - case HiveParser.TOK_SHOW_TABLESTATUS: - case HiveParser.TOK_SHOWPARTITIONS: - case HiveParser.TOK_SHOWTABLES: - return ast; - - // In all other cases, throw an exception. Its a white-list of allowed operations. - default: - throw new SemanticException("Operation not supported."); + private AbstractSemanticAnalyzerHook hook; + private ASTNode ast; + + + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) + throws SemanticException { + + this.ast = ast; + switch (ast.getToken().getType()) { + + // HCat wants to intercept following tokens and special-handle them. + case HiveParser.TOK_CREATETABLE: + hook = new CreateTableHook(); + return hook.preAnalyze(context, ast); + + case HiveParser.TOK_CREATEDATABASE: + hook = new CreateDatabaseHook(); + return hook.preAnalyze(context, ast); + + case HiveParser.TOK_ALTERTABLE_PARTITION: + if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) { + return ast; + } else if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_ALTERPARTS_MERGEFILES) { + // unsupported + throw new SemanticException("Operation not supported."); + } else { + return ast; + } + + // HCat will allow these operations to be performed. + // Database DDL + case HiveParser.TOK_SHOWDATABASES: + case HiveParser.TOK_DROPDATABASE: + case HiveParser.TOK_SWITCHDATABASE: + case HiveParser.TOK_DESCDATABASE: + case HiveParser.TOK_ALTERDATABASE_PROPERTIES: + + // Index DDL + case HiveParser.TOK_ALTERINDEX_PROPERTIES: + case HiveParser.TOK_CREATEINDEX: + case HiveParser.TOK_DROPINDEX: + case HiveParser.TOK_SHOWINDEXES: + + // View DDL + // "alter view add partition" does not work because of the nature of implementation + // of the DDL in hive. Hive will internally invoke another Driver on the select statement, + // and HCat does not let "select" statement through. I cannot find a way to get around it + // without modifying hive code. So just leave it unsupported. + //case HiveParser.TOK_ALTERVIEW_ADDPARTS: + case HiveParser.TOK_ALTERVIEW_DROPPARTS: + case HiveParser.TOK_ALTERVIEW_PROPERTIES: + case HiveParser.TOK_ALTERVIEW_RENAME: + case HiveParser.TOK_CREATEVIEW: + case HiveParser.TOK_DROPVIEW: + + // Authorization DDL + case HiveParser.TOK_CREATEROLE: + case HiveParser.TOK_DROPROLE: + case HiveParser.TOK_GRANT_ROLE: + case HiveParser.TOK_GRANT_WITH_OPTION: + case HiveParser.TOK_GRANT: + case HiveParser.TOK_REVOKE_ROLE: + case HiveParser.TOK_REVOKE: + case HiveParser.TOK_SHOW_GRANT: + case HiveParser.TOK_SHOW_ROLE_GRANT: + + // Misc DDL + case HiveParser.TOK_LOCKTABLE: + case HiveParser.TOK_UNLOCKTABLE: + case HiveParser.TOK_SHOWLOCKS: + case HiveParser.TOK_DESCFUNCTION: + case HiveParser.TOK_SHOWFUNCTIONS: + case HiveParser.TOK_EXPLAIN: + + // Table DDL + case HiveParser.TOK_ALTERTABLE_ADDPARTS: + case HiveParser.TOK_ALTERTABLE_ADDCOLS: + case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: + case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: + case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: + case HiveParser.TOK_ALTERTABLE_DROPPARTS: + case HiveParser.TOK_ALTERTABLE_PROPERTIES: + case HiveParser.TOK_ALTERTABLE_RENAME: + case HiveParser.TOK_ALTERTABLE_RENAMECOL: + case HiveParser.TOK_ALTERTABLE_REPLACECOLS: + case HiveParser.TOK_ALTERTABLE_SERIALIZER: + case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_DESCTABLE: + case HiveParser.TOK_DROPTABLE: + case HiveParser.TOK_SHOW_TABLESTATUS: + case HiveParser.TOK_SHOWPARTITIONS: + case HiveParser.TOK_SHOWTABLES: + return ast; + + // In all other cases, throw an exception. Its a white-list of allowed operations. + default: + throw new SemanticException("Operation not supported."); - } } - - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - - try { - - switch (ast.getToken().getType()) { - - case HiveParser.TOK_CREATETABLE: - case HiveParser.TOK_CREATEDATABASE: - case HiveParser.TOK_ALTERTABLE_PARTITION: - - // HCat will allow these operations to be performed. - // Database DDL - case HiveParser.TOK_SHOWDATABASES: - case HiveParser.TOK_DROPDATABASE: - case HiveParser.TOK_SWITCHDATABASE: - case HiveParser.TOK_DESCDATABASE: - case HiveParser.TOK_ALTERDATABASE_PROPERTIES: - - // Index DDL - case HiveParser.TOK_ALTERINDEX_PROPERTIES: - case HiveParser.TOK_CREATEINDEX: - case HiveParser.TOK_DROPINDEX: - case HiveParser.TOK_SHOWINDEXES: - - // View DDL - //case HiveParser.TOK_ALTERVIEW_ADDPARTS: - case HiveParser.TOK_ALTERVIEW_DROPPARTS: - case HiveParser.TOK_ALTERVIEW_PROPERTIES: - case HiveParser.TOK_ALTERVIEW_RENAME: - case HiveParser.TOK_CREATEVIEW: - case HiveParser.TOK_DROPVIEW: - - // Authorization DDL - case HiveParser.TOK_CREATEROLE: - case HiveParser.TOK_DROPROLE: - case HiveParser.TOK_GRANT_ROLE: - case HiveParser.TOK_GRANT_WITH_OPTION: - case HiveParser.TOK_GRANT: - case HiveParser.TOK_REVOKE_ROLE: - case HiveParser.TOK_REVOKE: - case HiveParser.TOK_SHOW_GRANT: - case HiveParser.TOK_SHOW_ROLE_GRANT: - - // Misc DDL - case HiveParser.TOK_LOCKTABLE: - case HiveParser.TOK_UNLOCKTABLE: - case HiveParser.TOK_SHOWLOCKS: - case HiveParser.TOK_DESCFUNCTION: - case HiveParser.TOK_SHOWFUNCTIONS: - case HiveParser.TOK_EXPLAIN: - - // Table DDL - case HiveParser.TOK_ALTERTABLE_ADDPARTS: - case HiveParser.TOK_ALTERTABLE_ADDCOLS: - case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: - case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: - case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: - case HiveParser.TOK_ALTERTABLE_DROPPARTS: - case HiveParser.TOK_ALTERTABLE_PROPERTIES: - case HiveParser.TOK_ALTERTABLE_RENAME: - case HiveParser.TOK_ALTERTABLE_RENAMECOL: - case HiveParser.TOK_ALTERTABLE_REPLACECOLS: - case HiveParser.TOK_ALTERTABLE_SERIALIZER: - case HiveParser.TOK_ALTERTABLE_TOUCH: - case HiveParser.TOK_DESCTABLE: - case HiveParser.TOK_DROPTABLE: - case HiveParser.TOK_SHOW_TABLESTATUS: - case HiveParser.TOK_SHOWPARTITIONS: - case HiveParser.TOK_SHOWTABLES: - break; - - default: - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, "Unexpected token: " + ast.getToken()); - } - - authorizeDDL(context, rootTasks); - - } catch (HCatException e) { - throw new SemanticException(e); - } catch (HiveException e) { - throw new SemanticException(e); - } - - if (hook != null) { - hook.postAnalyze(context, rootTasks); - } + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + + try { + + switch (ast.getToken().getType()) { + + case HiveParser.TOK_CREATETABLE: + case HiveParser.TOK_CREATEDATABASE: + case HiveParser.TOK_ALTERTABLE_PARTITION: + + // HCat will allow these operations to be performed. + // Database DDL + case HiveParser.TOK_SHOWDATABASES: + case HiveParser.TOK_DROPDATABASE: + case HiveParser.TOK_SWITCHDATABASE: + case HiveParser.TOK_DESCDATABASE: + case HiveParser.TOK_ALTERDATABASE_PROPERTIES: + + // Index DDL + case HiveParser.TOK_ALTERINDEX_PROPERTIES: + case HiveParser.TOK_CREATEINDEX: + case HiveParser.TOK_DROPINDEX: + case HiveParser.TOK_SHOWINDEXES: + + // View DDL + //case HiveParser.TOK_ALTERVIEW_ADDPARTS: + case HiveParser.TOK_ALTERVIEW_DROPPARTS: + case HiveParser.TOK_ALTERVIEW_PROPERTIES: + case HiveParser.TOK_ALTERVIEW_RENAME: + case HiveParser.TOK_CREATEVIEW: + case HiveParser.TOK_DROPVIEW: + + // Authorization DDL + case HiveParser.TOK_CREATEROLE: + case HiveParser.TOK_DROPROLE: + case HiveParser.TOK_GRANT_ROLE: + case HiveParser.TOK_GRANT_WITH_OPTION: + case HiveParser.TOK_GRANT: + case HiveParser.TOK_REVOKE_ROLE: + case HiveParser.TOK_REVOKE: + case HiveParser.TOK_SHOW_GRANT: + case HiveParser.TOK_SHOW_ROLE_GRANT: + + // Misc DDL + case HiveParser.TOK_LOCKTABLE: + case HiveParser.TOK_UNLOCKTABLE: + case HiveParser.TOK_SHOWLOCKS: + case HiveParser.TOK_DESCFUNCTION: + case HiveParser.TOK_SHOWFUNCTIONS: + case HiveParser.TOK_EXPLAIN: + + // Table DDL + case HiveParser.TOK_ALTERTABLE_ADDPARTS: + case HiveParser.TOK_ALTERTABLE_ADDCOLS: + case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: + case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: + case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: + case HiveParser.TOK_ALTERTABLE_DROPPARTS: + case HiveParser.TOK_ALTERTABLE_PROPERTIES: + case HiveParser.TOK_ALTERTABLE_RENAME: + case HiveParser.TOK_ALTERTABLE_RENAMECOL: + case HiveParser.TOK_ALTERTABLE_REPLACECOLS: + case HiveParser.TOK_ALTERTABLE_SERIALIZER: + case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_DESCTABLE: + case HiveParser.TOK_DROPTABLE: + case HiveParser.TOK_SHOW_TABLESTATUS: + case HiveParser.TOK_SHOWPARTITIONS: + case HiveParser.TOK_SHOWTABLES: + break; + + default: + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, "Unexpected token: " + ast.getToken()); + } + + authorizeDDL(context, rootTasks); + + } catch (HCatException e) { + throw new SemanticException(e); + } catch (HiveException e) { + throw new SemanticException(e); } - private String extractTableName(String compoundName) { - /* - * the table name can potentially be a dot-format one with column names - * specified as part of the table name. e.g. a.b.c where b is a column in - * a and c is a field of the object/column b etc. For authorization - * purposes, we should use only the first part of the dotted name format. - * - */ - - String[] words = compoundName.split("\\."); - return words[0]; + if (hook != null) { + hook.postAnalyze(context, rootTasks); + } + } + + private String extractTableName(String compoundName) { + /* + * the table name can potentially be a dot-format one with column names + * specified as part of the table name. e.g. a.b.c where b is a column in + * a and c is a field of the object/column b etc. For authorization + * purposes, we should use only the first part of the dotted name format. + * + */ + + String[] words = compoundName.split("\\."); + return words[0]; + } + + @Override + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) + throws HiveException { + // DB opereations, none of them are enforced by Hive right now. + + ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); + if (showDatabases != null) { + authorize(HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), + HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); } - @Override - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) - throws HiveException { - // DB opereations, none of them are enforced by Hive right now. - - ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); - if (showDatabases != null) { - authorize(HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), - HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); - } - - DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); - if (dropDb != null) { - Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); - authorize(db, Privilege.DROP); - } + DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); + if (dropDb != null) { + Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); + authorize(db, Privilege.DROP); + } - DescDatabaseDesc descDb = work.getDescDatabaseDesc(); - if (descDb != null) { - Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); - authorize(db, Privilege.SELECT); - } + DescDatabaseDesc descDb = work.getDescDatabaseDesc(); + if (descDb != null) { + Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); + authorize(db, Privilege.SELECT); + } - SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); - if (switchDb != null) { - Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); - authorize(db, Privilege.SELECT); - } + SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); + if (switchDb != null) { + Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); + authorize(db, Privilege.SELECT); + } - ShowTablesDesc showTables = work.getShowTblsDesc(); - if (showTables != null) { - String dbName = showTables.getDbName() == null ? cntxt.getHive().getCurrentDatabase() - : showTables.getDbName(); - authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); - } + ShowTablesDesc showTables = work.getShowTblsDesc(); + if (showTables != null) { + String dbName = showTables.getDbName() == null ? cntxt.getHive().getCurrentDatabase() + : showTables.getDbName(); + authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); + } - ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); - if (showTableStatus != null) { - String dbName = showTableStatus.getDbName() == null ? cntxt.getHive().getCurrentDatabase() - : showTableStatus.getDbName(); - authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); - } + ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); + if (showTableStatus != null) { + String dbName = showTableStatus.getDbName() == null ? cntxt.getHive().getCurrentDatabase() + : showTableStatus.getDbName(); + authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); + } - // TODO: add alter database support in HCat - - // Table operations. - - DropTableDesc dropTable = work.getDropTblDesc(); - if (dropTable != null) { - if (dropTable.getPartSpecs() == null) { - // drop table is already enforced by Hive. We only check for table level location even if the - // table is partitioned. - } else { - //this is actually a ALTER TABLE DROP PARITITION statement - for (PartitionSpec partSpec : dropTable.getPartSpecs()) { - // partitions are not added as write entries in drop partitions in Hive - Table table = hive.getTable(hive.getCurrentDatabase(), dropTable.getTableName()); - List partitions = null; - try { - partitions = hive.getPartitionsByFilter(table, partSpec.toString()); - } catch (Exception e) { - throw new HiveException(e); - } - - for (Partition part : partitions) { - authorize(part, Privilege.DROP); - } - } - } + // TODO: add alter database support in HCat + + // Table operations. + + DropTableDesc dropTable = work.getDropTblDesc(); + if (dropTable != null) { + if (dropTable.getPartSpecs() == null) { + // drop table is already enforced by Hive. We only check for table level location even if the + // table is partitioned. + } else { + //this is actually a ALTER TABLE DROP PARITITION statement + for (PartitionSpec partSpec : dropTable.getPartSpecs()) { + // partitions are not added as write entries in drop partitions in Hive + Table table = hive.getTable(hive.getCurrentDatabase(), dropTable.getTableName()); + List partitions = null; + try { + partitions = hive.getPartitionsByFilter(table, partSpec.toString()); + } catch (Exception e) { + throw new HiveException(e); + } + + for (Partition part : partitions) { + authorize(part, Privilege.DROP); + } } + } + } - AlterTableDesc alterTable = work.getAlterTblDesc(); - if (alterTable != null) { - Table table = hive.getTable(hive.getCurrentDatabase(), alterTable.getOldName(), false); - - Partition part = null; - if (alterTable.getPartSpec() != null) { - part = hive.getPartition(table, alterTable.getPartSpec(), false); - } - - String newLocation = alterTable.getNewLocation(); - - /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements - * for the old table/partition location and the new location. - */ - if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { - if (part != null) { - authorize(part, Privilege.ALTER_DATA); // authorize for the old - // location, and new location - part.setLocation(newLocation); - authorize(part, Privilege.ALTER_DATA); - } else { - authorize(table, Privilege.ALTER_DATA); // authorize for the old - // location, and new location - table.getTTable().getSd().setLocation(newLocation); - authorize(table, Privilege.ALTER_DATA); - } - } - //other alter operations are already supported by Hive + AlterTableDesc alterTable = work.getAlterTblDesc(); + if (alterTable != null) { + Table table = hive.getTable(hive.getCurrentDatabase(), alterTable.getOldName(), false); + + Partition part = null; + if (alterTable.getPartSpec() != null) { + part = hive.getPartition(table, alterTable.getPartSpec(), false); + } + + String newLocation = alterTable.getNewLocation(); + + /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements + * for the old table/partition location and the new location. + */ + if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { + if (part != null) { + authorize(part, Privilege.ALTER_DATA); // authorize for the old + // location, and new location + part.setLocation(newLocation); + authorize(part, Privilege.ALTER_DATA); + } else { + authorize(table, Privilege.ALTER_DATA); // authorize for the old + // location, and new location + table.getTTable().getSd().setLocation(newLocation); + authorize(table, Privilege.ALTER_DATA); } + } + //other alter operations are already supported by Hive + } - // we should be careful when authorizing table based on just the - // table name. If columns have separate authorization domain, it - // must be honored - DescTableDesc descTable = work.getDescTblDesc(); - if (descTable != null) { - String tableName = extractTableName(descTable.getTableName()); - authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); - } + // we should be careful when authorizing table based on just the + // table name. If columns have separate authorization domain, it + // must be honored + DescTableDesc descTable = work.getDescTblDesc(); + if (descTable != null) { + String tableName = extractTableName(descTable.getTableName()); + authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); + } - ShowPartitionsDesc showParts = work.getShowPartsDesc(); - if (showParts != null) { - String tableName = extractTableName(showParts.getTabName()); - authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); - } + ShowPartitionsDesc showParts = work.getShowPartsDesc(); + if (showParts != null) { + String tableName = extractTableName(showParts.getTabName()); + authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java index 14b9578..c144135 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java @@ -44,136 +44,136 @@ */ public class HCatSemanticAnalyzerBase extends AbstractSemanticAnalyzerHook { - private HiveAuthorizationProvider authProvider; + private HiveAuthorizationProvider authProvider; - protected String getDbName(Hive hive, String dbName) { - return dbName == null ? hive.getCurrentDatabase() : dbName; - } - - public HiveAuthorizationProvider getAuthProvider() { - if (authProvider == null) { - authProvider = SessionState.get().getAuthorizer(); - } + protected String getDbName(Hive hive, String dbName) { + return dbName == null ? hive.getCurrentDatabase() : dbName; + } - return authProvider; + public HiveAuthorizationProvider getAuthProvider() { + if (authProvider == null) { + authProvider = SessionState.get().getAuthorizer(); } - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - super.postAnalyze(context, rootTasks); - - //Authorize the operation. - authorizeDDL(context, rootTasks); + return authProvider; + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + super.postAnalyze(context, rootTasks); + + //Authorize the operation. + authorizeDDL(context, rootTasks); + } + + /** + * Checks for the given rootTasks, and calls authorizeDDLWork() for each DDLWork to + * be authorized. The hooks should override this, or authorizeDDLWork to perform the + * actual authorization. + */ + /* + * Impl note: Hive provides authorization with it's own model, and calls the defined + * HiveAuthorizationProvider from Driver.doAuthorization(). However, HCat has to + * do additional calls to the auth provider to implement expected behavior for + * StorageDelegationAuthorizationProvider. This means, that the defined auth provider + * is called by both Hive and HCat. The following are missing from Hive's implementation, + * and when they are fixed in Hive, we can remove the HCat-specific auth checks. + * 1. CREATE DATABASE/TABLE, ADD PARTITION statements does not call + * HiveAuthorizationProvider.authorize() with the candidate objects, which means that + * we cannot do checks against defined LOCATION. + * 2. HiveOperation does not define sufficient Privileges for most of the operations, + * especially database operations. + * 3. For some of the operations, Hive SemanticAnalyzer does not add the changed + * object as a WriteEntity or ReadEntity. + * + * @see https://issues.apache.org/jira/browse/HCATALOG-244 + * @see https://issues.apache.org/jira/browse/HCATALOG-245 + */ + protected void authorizeDDL(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + + if (!HiveConf.getBoolVar(context.getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + return; } - /** - * Checks for the given rootTasks, and calls authorizeDDLWork() for each DDLWork to - * be authorized. The hooks should override this, or authorizeDDLWork to perform the - * actual authorization. - */ - /* - * Impl note: Hive provides authorization with it's own model, and calls the defined - * HiveAuthorizationProvider from Driver.doAuthorization(). However, HCat has to - * do additional calls to the auth provider to implement expected behavior for - * StorageDelegationAuthorizationProvider. This means, that the defined auth provider - * is called by both Hive and HCat. The following are missing from Hive's implementation, - * and when they are fixed in Hive, we can remove the HCat-specific auth checks. - * 1. CREATE DATABASE/TABLE, ADD PARTITION statements does not call - * HiveAuthorizationProvider.authorize() with the candidate objects, which means that - * we cannot do checks against defined LOCATION. - * 2. HiveOperation does not define sufficient Privileges for most of the operations, - * especially database operations. - * 3. For some of the operations, Hive SemanticAnalyzer does not add the changed - * object as a WriteEntity or ReadEntity. - * - * @see https://issues.apache.org/jira/browse/HCATALOG-244 - * @see https://issues.apache.org/jira/browse/HCATALOG-245 - */ - protected void authorizeDDL(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - - if (!HiveConf.getBoolVar(context.getConf(), - HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - return; - } + Hive hive; + try { + hive = context.getHive(); - Hive hive; - try { - hive = context.getHive(); - - for (Task task : rootTasks) { - if (task.getWork() instanceof DDLWork) { - DDLWork work = (DDLWork) task.getWork(); - if (work != null) { - authorizeDDLWork(context, hive, work); - } - } - } - } catch (SemanticException ex) { - throw ex; - } catch (AuthorizationException ex) { - throw ex; - } catch (Exception ex) { - throw new SemanticException(ex); + for (Task task : rootTasks) { + if (task.getWork() instanceof DDLWork) { + DDLWork work = (DDLWork) task.getWork(); + if (work != null) { + authorizeDDLWork(context, hive, work); + } } + } + } catch (SemanticException ex) { + throw ex; + } catch (AuthorizationException ex) { + throw ex; + } catch (Exception ex) { + throw new SemanticException(ex); } - - /** - * Authorized the given DDLWork. Does nothing by default. Override this - * and delegate to the relevant method in HiveAuthorizationProvider obtained by - * getAuthProvider(). - */ - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, - Hive hive, DDLWork work) throws HiveException { + } + + /** + * Authorized the given DDLWork. Does nothing by default. Override this + * and delegate to the relevant method in HiveAuthorizationProvider obtained by + * getAuthProvider(). + */ + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, + Hive hive, DDLWork work) throws HiveException { + } + + protected void authorize(Privilege[] inputPrivs, Privilege[] outputPrivs) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(inputPrivs, outputPrivs); + } catch (HiveException ex) { + throw new SemanticException(ex); } - - protected void authorize(Privilege[] inputPrivs, Privilege[] outputPrivs) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(inputPrivs, outputPrivs); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + } + + protected void authorize(Database db, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(db, null, new Privilege[]{priv}); + } catch (HiveException ex) { + throw new SemanticException(ex); } - - protected void authorize(Database db, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(db, null, new Privilege[]{priv}); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + } + + protected void authorizeTable(Hive hive, String tableName, Privilege priv) + throws AuthorizationException, HiveException { + Table table; + try { + table = hive.getTable(tableName); + } catch (InvalidTableException ite) { + // Table itself doesn't exist in metastore, nothing to validate. + return; } - protected void authorizeTable(Hive hive, String tableName, Privilege priv) - throws AuthorizationException, HiveException { - Table table; - try { - table = hive.getTable(tableName); - } catch (InvalidTableException ite) { - // Table itself doesn't exist in metastore, nothing to validate. - return; - } - - authorize(table, priv); - } + authorize(table, priv); + } - protected void authorize(Table table, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(table, new Privilege[]{priv}, null); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + protected void authorize(Table table, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(table, new Privilege[]{priv}, null); + } catch (HiveException ex) { + throw new SemanticException(ex); } - - protected void authorize(Partition part, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(part, new Privilege[]{priv}, null); - } catch (HiveException ex) { - throw new SemanticException(ex); - } + } + + protected void authorize(Partition part, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(part, new Privilege[]{priv}, null); + } catch (HiveException ex) { + throw new SemanticException(ex); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/ErrorType.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/ErrorType.java index ecc1d1b..63a4d62 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/ErrorType.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/ErrorType.java @@ -23,115 +23,115 @@ */ public enum ErrorType { - /* HCat Input Format related errors 1000 - 1999 */ - ERROR_DB_INIT (1000, "Error initializing database session"), - ERROR_EXCEED_MAXPART (1001, "Query result exceeded maximum number of partitions allowed"), - - ERROR_SET_INPUT (1002, "Error setting input information"), - - /* HCat Output Format related errors 2000 - 2999 */ - ERROR_INVALID_TABLE (2000, "Table specified does not exist"), - ERROR_SET_OUTPUT (2001, "Error setting output information"), - ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"), - ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"), - ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"), - ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"), - ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"), - ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"), - ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"), - ERROR_SCHEMA_TYPE_MISMATCH (2009, "Invalid column type in partition schema"), - ERROR_INVALID_PARTITION_VALUES (2010, "Invalid partition values specified"), - ERROR_MISSING_PARTITION_KEY (2011, "Partition key value not provided for publish"), - ERROR_MOVE_FAILED (2012, "Moving of data failed during commit"), - ERROR_TOO_MANY_DYNAMIC_PTNS (2013, "Attempt to create too many dynamic partitions"), - ERROR_INIT_LOADER (2014, "Error initializing Pig loader"), - ERROR_INIT_STORER (2015, "Error initializing Pig storer"), - ERROR_NOT_SUPPORTED (2016, "Error operation not supported"), - - /* Authorization Errors 3000 - 3999 */ - ERROR_ACCESS_CONTROL (3000, "Permission denied"), - - /* Miscellaneous errors, range 9000 - 9998 */ - ERROR_UNIMPLEMENTED (9000, "Functionality currently unimplemented"), - ERROR_INTERNAL_EXCEPTION (9001, "Exception occurred while processing HCat request"); - - /** The error code. */ - private int errorCode; - - /** The error message. */ - private String errorMessage; - - /** Should the causal exception message be appended to the error message, yes by default*/ - private boolean appendCauseMessage = true; - - /** Is this a retriable error, no by default. */ - private boolean isRetriable = false; - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - */ - private ErrorType(int errorCode, String errorMessage) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - } - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - * @param appendCauseMessage should causal exception message be appended to error message - */ - private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - this.appendCauseMessage = appendCauseMessage; - } - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - * @param appendCauseMessage should causal exception message be appended to error message - * @param isRetriable is this a retriable error - */ - private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage, boolean isRetriable) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - this.appendCauseMessage = appendCauseMessage; - this.isRetriable = isRetriable; - } - - /** - * Gets the error code. - * @return the error code - */ - public int getErrorCode() { - return errorCode; - } - - /** - * Gets the error message. - * @return the error message - */ - public String getErrorMessage() { - return errorMessage; - } - - /** - * Checks if this is a retriable error. - * @return true, if is a retriable error, false otherwise - */ - public boolean isRetriable() { - return isRetriable; - } - - /** - * Whether the cause of the exception should be added to the error message. - * @return true, if the cause should be added to the message, false otherwise - */ - public boolean appendCauseMessage() { - return appendCauseMessage; - } + /* HCat Input Format related errors 1000 - 1999 */ + ERROR_DB_INIT (1000, "Error initializing database session"), + ERROR_EXCEED_MAXPART (1001, "Query result exceeded maximum number of partitions allowed"), + + ERROR_SET_INPUT (1002, "Error setting input information"), + + /* HCat Output Format related errors 2000 - 2999 */ + ERROR_INVALID_TABLE (2000, "Table specified does not exist"), + ERROR_SET_OUTPUT (2001, "Error setting output information"), + ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"), + ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"), + ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"), + ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"), + ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"), + ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"), + ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"), + ERROR_SCHEMA_TYPE_MISMATCH (2009, "Invalid column type in partition schema"), + ERROR_INVALID_PARTITION_VALUES (2010, "Invalid partition values specified"), + ERROR_MISSING_PARTITION_KEY (2011, "Partition key value not provided for publish"), + ERROR_MOVE_FAILED (2012, "Moving of data failed during commit"), + ERROR_TOO_MANY_DYNAMIC_PTNS (2013, "Attempt to create too many dynamic partitions"), + ERROR_INIT_LOADER (2014, "Error initializing Pig loader"), + ERROR_INIT_STORER (2015, "Error initializing Pig storer"), + ERROR_NOT_SUPPORTED (2016, "Error operation not supported"), + + /* Authorization Errors 3000 - 3999 */ + ERROR_ACCESS_CONTROL (3000, "Permission denied"), + + /* Miscellaneous errors, range 9000 - 9998 */ + ERROR_UNIMPLEMENTED (9000, "Functionality currently unimplemented"), + ERROR_INTERNAL_EXCEPTION (9001, "Exception occurred while processing HCat request"); + + /** The error code. */ + private int errorCode; + + /** The error message. */ + private String errorMessage; + + /** Should the causal exception message be appended to the error message, yes by default*/ + private boolean appendCauseMessage = true; + + /** Is this a retriable error, no by default. */ + private boolean isRetriable = false; + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + */ + private ErrorType(int errorCode, String errorMessage) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + } + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + * @param appendCauseMessage should causal exception message be appended to error message + */ + private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + this.appendCauseMessage = appendCauseMessage; + } + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + * @param appendCauseMessage should causal exception message be appended to error message + * @param isRetriable is this a retriable error + */ + private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage, boolean isRetriable) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + this.appendCauseMessage = appendCauseMessage; + this.isRetriable = isRetriable; + } + + /** + * Gets the error code. + * @return the error code + */ + public int getErrorCode() { + return errorCode; + } + + /** + * Gets the error message. + * @return the error message + */ + public String getErrorMessage() { + return errorMessage; + } + + /** + * Checks if this is a retriable error. + * @return true, if is a retriable error, false otherwise + */ + public boolean isRetriable() { + return isRetriable; + } + + /** + * Whether the cause of the exception should be added to the error message. + * @return true, if the cause should be added to the message, false otherwise + */ + public boolean appendCauseMessage() { + return appendCauseMessage; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java index 56e8c05..85a9565 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java @@ -24,163 +24,163 @@ public final class HCatConstants { - public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; - public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; - - public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); - public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); - - public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage"; - public static final String HCAT_PIG_LOADER = "hcat.pig.loader"; - public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set"; - public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args"; - public static final String HCAT_PIG_STORER = "hcat.pig.storer"; - public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args"; - public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter"; - public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ","; - public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"; - public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name"; - public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple"; - public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name"; - public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield"; - - /** - * {@value} (default: null) - * When the property is set in the UDFContext of the org.apache.hive.hcatalog.pig.HCatStorer, HCatStorer writes - * to the location it specifies instead of the default HCatalog location format. An example can be found - * in org.apache.hive.hcatalog.pig.HCatStorerWrapper. - */ - public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location"; - - //The keys used to store info into the job Configuration - public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat"; - - public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema"; - - public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info"; - - // hcatalog specific configurations, that can be put in hive-site.xml - public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time"; - - private HCatConstants() { // restrict instantiation - } - - public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema"; - - public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname; - - public static final String HCAT_PERMS = "hcat.perms"; - - public static final String HCAT_GROUP = "hcat.group"; - - public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name"; - - public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name"; - - public static final String HCAT_METASTORE_PRINCIPAL - = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname; - - /** - * The desired number of input splits produced for each partition. When the - * input files are large and few, we want to split them into many splits, - * so as to increase the parallelizm of loading the splits. Try also two - * other parameters, mapred.min.split.size and mapred.max.split.size, to - * control the number of input splits. - */ - public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = - "hcat.desired.partition.num.splits"; - - // IMPORTANT IMPORTANT IMPORTANT!!!!! - //The keys used to store info into the job Configuration. - //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer - //updates the job configuration in the backend to insert these keys to avoid - //having to call setOutput from the backend (which would cause a metastore call - //from the map jobs) - public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput"; - public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info"; - public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf"; - public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig"; - - public static final String[] OUTPUT_CONFS_TO_SAVE = { - HCAT_KEY_OUTPUT_INFO, - HCAT_KEY_HIVE_CONF, - HCAT_KEY_TOKEN_SIGNATURE - }; - - - public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq"; - public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration"; - - public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name"; - public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy"; - public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix"; - - public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + "dynamic.jobid"; - public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false; - - // Message Bus related properties. - public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat"; - public static final String HCAT_EVENT = "HCAT_EVENT"; - public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION"; - public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION"; - public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE"; - public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE"; - public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE"; - public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE"; - public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE"; - public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION"; - public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT"; - public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; - public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; - public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory"; - - // System environment variables - public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"; - - // Hadoop Conf Var Names - public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; - - //*************************************************************************** - // Data-related configuration properties. - //*************************************************************************** - - /** - * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}). - * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions - * will not expect boolean values when upgrading Pig. For integration the option is offered to - * convert boolean fields to integers by setting this Hadoop configuration key. - */ - public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = - "hcat.data.convert.boolean.to.integer"; - public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; - - /** - * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}). - * Hive tables support tinyint and smallint columns, while not all processing frameworks support - * these types (Pig only has integer for example). Enable this property to promote tinyint and - * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns - * enforce bounds checking and jobs will fail if attempting to write values outside the column - * bounds. - */ - public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION = - "hcat.data.tiny.small.int.promotion"; - public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false; - - /** - * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}). - * Threshold for the ratio of bad records that will be silently skipped without causing a task - * failure. This is useful when processing large data sets with corrupt records, when its - * acceptable to skip some bad records. - */ - public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold"; - public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f; - - /** - * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}). - * Number of bad records that will be accepted before applying - * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad - * record from causing a task failure. - */ - public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min"; - public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2; + public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; + public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; + + public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); + public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); + + public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage"; + public static final String HCAT_PIG_LOADER = "hcat.pig.loader"; + public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set"; + public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args"; + public static final String HCAT_PIG_STORER = "hcat.pig.storer"; + public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args"; + public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter"; + public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ","; + public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"; + public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name"; + public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple"; + public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name"; + public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield"; + + /** + * {@value} (default: null) + * When the property is set in the UDFContext of the org.apache.hive.hcatalog.pig.HCatStorer, HCatStorer writes + * to the location it specifies instead of the default HCatalog location format. An example can be found + * in org.apache.hive.hcatalog.pig.HCatStorerWrapper. + */ + public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location"; + + //The keys used to store info into the job Configuration + public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat"; + + public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema"; + + public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info"; + + // hcatalog specific configurations, that can be put in hive-site.xml + public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time"; + + private HCatConstants() { // restrict instantiation + } + + public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema"; + + public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname; + + public static final String HCAT_PERMS = "hcat.perms"; + + public static final String HCAT_GROUP = "hcat.group"; + + public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name"; + + public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name"; + + public static final String HCAT_METASTORE_PRINCIPAL + = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname; + + /** + * The desired number of input splits produced for each partition. When the + * input files are large and few, we want to split them into many splits, + * so as to increase the parallelizm of loading the splits. Try also two + * other parameters, mapred.min.split.size and mapred.max.split.size, to + * control the number of input splits. + */ + public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = + "hcat.desired.partition.num.splits"; + + // IMPORTANT IMPORTANT IMPORTANT!!!!! + //The keys used to store info into the job Configuration. + //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer + //updates the job configuration in the backend to insert these keys to avoid + //having to call setOutput from the backend (which would cause a metastore call + //from the map jobs) + public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput"; + public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info"; + public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf"; + public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig"; + + public static final String[] OUTPUT_CONFS_TO_SAVE = { + HCAT_KEY_OUTPUT_INFO, + HCAT_KEY_HIVE_CONF, + HCAT_KEY_TOKEN_SIGNATURE + }; + + + public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq"; + public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration"; + + public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name"; + public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy"; + public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix"; + + public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + "dynamic.jobid"; + public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false; + + // Message Bus related properties. + public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat"; + public static final String HCAT_EVENT = "HCAT_EVENT"; + public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION"; + public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION"; + public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE"; + public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE"; + public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE"; + public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE"; + public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE"; + public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION"; + public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT"; + public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; + public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; + public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory"; + + // System environment variables + public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"; + + // Hadoop Conf Var Names + public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; + + //*************************************************************************** + // Data-related configuration properties. + //*************************************************************************** + + /** + * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}). + * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions + * will not expect boolean values when upgrading Pig. For integration the option is offered to + * convert boolean fields to integers by setting this Hadoop configuration key. + */ + public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = + "hcat.data.convert.boolean.to.integer"; + public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; + + /** + * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}). + * Hive tables support tinyint and smallint columns, while not all processing frameworks support + * these types (Pig only has integer for example). Enable this property to promote tinyint and + * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns + * enforce bounds checking and jobs will fail if attempting to write values outside the column + * bounds. + */ + public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION = + "hcat.data.tiny.small.int.promotion"; + public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false; + + /** + * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}). + * Threshold for the ratio of bad records that will be silently skipped without causing a task + * failure. This is useful when processing large data sets with corrupt records, when its + * acceptable to skip some bad records. + */ + public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold"; + public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f; + + /** + * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}). + * Number of bad records that will be accepted before applying + * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad + * record from causing a task failure. + */ + public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min"; + public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2; } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatContext.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatContext.java index aa3b5fe..6f5d43c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatContext.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatContext.java @@ -47,41 +47,41 @@ @InterfaceAudience.Public @InterfaceStability.Evolving public enum HCatContext { - INSTANCE; + INSTANCE; - private Configuration conf = null; + private Configuration conf = null; - /** - * Use the given configuration for optional behavior. Keys exclusive to an existing config - * are set in the new conf. The job conf must be used to ensure properties are passed to - * backend MR tasks. - */ - public synchronized HCatContext setConf(Configuration newConf) { - Preconditions.checkNotNull(newConf, "Required parameter 'newConf' must not be null."); + /** + * Use the given configuration for optional behavior. Keys exclusive to an existing config + * are set in the new conf. The job conf must be used to ensure properties are passed to + * backend MR tasks. + */ + public synchronized HCatContext setConf(Configuration newConf) { + Preconditions.checkNotNull(newConf, "Required parameter 'newConf' must not be null."); - if (conf == null) { - conf = newConf; - return this; - } + if (conf == null) { + conf = newConf; + return this; + } - if (conf != newConf) { - for (Map.Entry entry : conf) { - if ((entry.getKey().matches("hcat.*")) && (newConf.get(entry.getKey()) == null)) { - newConf.set(entry.getKey(), entry.getValue()); - } - } - conf = newConf; + if (conf != newConf) { + for (Map.Entry entry : conf) { + if ((entry.getKey().matches("hcat.*")) && (newConf.get(entry.getKey()) == null)) { + newConf.set(entry.getKey(), entry.getValue()); } - return this; + } + conf = newConf; } + return this; + } - /** - * Get the configuration, if there is one. Users are not required to setup HCatContext - * unless they wish to override default behavior, so the configuration may not be present. - * - * @return an Optional that might contain a Configuration - */ - public Optional getConf() { - return Optional.fromNullable(conf); - } + /** + * Get the configuration, if there is one. Users are not required to setup HCatContext + * unless they wish to override default behavior, so the configuration may not be present. + * + * @return an Optional that might contain a Configuration + */ + public Optional getConf() { + return Optional.fromNullable(conf); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatException.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatException.java index 77e8abc..265d08d 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatException.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatException.java @@ -25,135 +25,135 @@ */ public class HCatException extends IOException { - private static final long serialVersionUID = 1L; - - /** The error type enum for this exception. */ - private final ErrorType errorType; - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - */ - public HCatException(ErrorType errorType) { - this(errorType, null, null); - } - - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param cause the cause - */ - public HCatException(ErrorType errorType, Throwable cause) { - this(errorType, null, cause); - } - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param extraMessage extra messages to add to the message string - */ - public HCatException(ErrorType errorType, String extraMessage) { - this(errorType, extraMessage, null); - } - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param extraMessage extra messages to add to the message string - * @param cause the cause - */ - public HCatException(ErrorType errorType, String extraMessage, Throwable cause) { - super(buildErrorMessage( - errorType, - extraMessage, - cause), cause); - this.errorType = errorType; - } - - - //TODO : remove default error type constructors after all exceptions - //are changed to use error types - - /** - * Instantiates a new hcat exception. - * @param message the error message - */ - public HCatException(String message) { - this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, null); - } - - /** - * Instantiates a new hcat exception. - * @param message the error message - * @param cause the cause - */ - public HCatException(String message, Throwable cause) { - this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, cause); - } - - - /** - * Builds the error message string. The error type message is appended with the extra message. If appendCause - * is true for the error type, then the message of the cause also is added to the message. - * @param type the error type - * @param extraMessage the extra message string - * @param cause the cause for the exception - * @return the exception message string - */ - public static String buildErrorMessage(ErrorType type, String extraMessage, Throwable cause) { - - //Initial message is just the error type message - StringBuffer message = new StringBuffer(HCatException.class.getName()); - message.append(" : " + type.getErrorCode()); - message.append(" : " + type.getErrorMessage()); - - if (extraMessage != null) { - //Add the extra message value to buffer - message.append(" : " + extraMessage); - } - - if (type.appendCauseMessage()) { - if (cause != null) { - //Add the cause message to buffer - message.append(". Cause : " + cause.toString()); - } - } - - return message.toString(); + private static final long serialVersionUID = 1L; + + /** The error type enum for this exception. */ + private final ErrorType errorType; + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + */ + public HCatException(ErrorType errorType) { + this(errorType, null, null); + } + + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param cause the cause + */ + public HCatException(ErrorType errorType, Throwable cause) { + this(errorType, null, cause); + } + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param extraMessage extra messages to add to the message string + */ + public HCatException(ErrorType errorType, String extraMessage) { + this(errorType, extraMessage, null); + } + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param extraMessage extra messages to add to the message string + * @param cause the cause + */ + public HCatException(ErrorType errorType, String extraMessage, Throwable cause) { + super(buildErrorMessage( + errorType, + extraMessage, + cause), cause); + this.errorType = errorType; + } + + + //TODO : remove default error type constructors after all exceptions + //are changed to use error types + + /** + * Instantiates a new hcat exception. + * @param message the error message + */ + public HCatException(String message) { + this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, null); + } + + /** + * Instantiates a new hcat exception. + * @param message the error message + * @param cause the cause + */ + public HCatException(String message, Throwable cause) { + this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, cause); + } + + + /** + * Builds the error message string. The error type message is appended with the extra message. If appendCause + * is true for the error type, then the message of the cause also is added to the message. + * @param type the error type + * @param extraMessage the extra message string + * @param cause the cause for the exception + * @return the exception message string + */ + public static String buildErrorMessage(ErrorType type, String extraMessage, Throwable cause) { + + //Initial message is just the error type message + StringBuffer message = new StringBuffer(HCatException.class.getName()); + message.append(" : " + type.getErrorCode()); + message.append(" : " + type.getErrorMessage()); + + if (extraMessage != null) { + //Add the extra message value to buffer + message.append(" : " + extraMessage); } - - /** - * Is this a retriable error. - * @return is it retriable - */ - public boolean isRetriable() { - return errorType.isRetriable(); - } - - /** - * Gets the error type. - * @return the error type enum - */ - public ErrorType getErrorType() { - return errorType; + if (type.appendCauseMessage()) { + if (cause != null) { + //Add the cause message to buffer + message.append(". Cause : " + cause.toString()); + } } - /** - * Gets the error code. - * @return the error code - */ - public int getErrorCode() { - return errorType.getErrorCode(); - } - - /* (non-Javadoc) - * @see java.lang.Throwable#toString() - */ - @Override - public String toString() { - return getMessage(); - } + return message.toString(); + } + + + /** + * Is this a retriable error. + * @return is it retriable + */ + public boolean isRetriable() { + return errorType.isRetriable(); + } + + /** + * Gets the error type. + * @return the error type enum + */ + public ErrorType getErrorType() { + return errorType; + } + + /** + * Gets the error code. + * @return the error code + */ + public int getErrorCode() { + return errorType.getErrorCode(); + } + + /* (non-Javadoc) + * @see java.lang.Throwable#toString() + */ + @Override + public String toString() { + return getMessage(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java index 841857a..ee3b443 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java @@ -76,552 +76,552 @@ public class HCatUtil { - private static final Logger LOG = LoggerFactory.getLogger(HCatUtil.class); - private static volatile HiveClientCache hiveClientCache; - private final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60; - - public static boolean checkJobContextIfRunningFromBackend(JobContext j) { - if (j.getConfiguration().get("mapred.task.id", "").equals("") && - !("true".equals(j.getConfiguration().get("pig.illustrating")))) { - return false; - } - return true; + private static final Logger LOG = LoggerFactory.getLogger(HCatUtil.class); + private static volatile HiveClientCache hiveClientCache; + private final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60; + + public static boolean checkJobContextIfRunningFromBackend(JobContext j) { + if (j.getConfiguration().get("mapred.task.id", "").equals("") && + !("true".equals(j.getConfiguration().get("pig.illustrating")))) { + return false; } + return true; + } - public static String serialize(Serializable obj) throws IOException { - if (obj == null) { - return ""; - } - try { - ByteArrayOutputStream serialObj = new ByteArrayOutputStream(); - ObjectOutputStream objStream = new ObjectOutputStream(serialObj); - objStream.writeObject(obj); - objStream.close(); - return encodeBytes(serialObj.toByteArray()); - } catch (Exception e) { - throw new IOException("Serialization error: " + e.getMessage(), e); - } + public static String serialize(Serializable obj) throws IOException { + if (obj == null) { + return ""; } - - public static Object deserialize(String str) throws IOException { - if (str == null || str.length() == 0) { - return null; - } - try { - ByteArrayInputStream serialObj = new ByteArrayInputStream( - decodeBytes(str)); - ObjectInputStream objStream = new ObjectInputStream(serialObj); - return objStream.readObject(); - } catch (Exception e) { - throw new IOException("Deserialization error: " + e.getMessage(), e); - } + try { + ByteArrayOutputStream serialObj = new ByteArrayOutputStream(); + ObjectOutputStream objStream = new ObjectOutputStream(serialObj); + objStream.writeObject(obj); + objStream.close(); + return encodeBytes(serialObj.toByteArray()); + } catch (Exception e) { + throw new IOException("Serialization error: " + e.getMessage(), e); } + } - public static String encodeBytes(byte[] bytes) { - StringBuffer strBuf = new StringBuffer(); - - for (int i = 0; i < bytes.length; i++) { - strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a'))); - strBuf.append((char) (((bytes[i]) & 0xF) + ('a'))); - } - - return strBuf.toString(); + public static Object deserialize(String str) throws IOException { + if (str == null || str.length() == 0) { + return null; } - - public static byte[] decodeBytes(String str) { - byte[] bytes = new byte[str.length() / 2]; - for (int i = 0; i < str.length(); i += 2) { - char c = str.charAt(i); - bytes[i / 2] = (byte) ((c - 'a') << 4); - c = str.charAt(i + 1); - bytes[i / 2] += (c - 'a'); - } - return bytes; + try { + ByteArrayInputStream serialObj = new ByteArrayInputStream( + decodeBytes(str)); + ObjectInputStream objStream = new ObjectInputStream(serialObj); + return objStream.readObject(); + } catch (Exception e) { + throw new IOException("Deserialization error: " + e.getMessage(), e); } + } - public static List getHCatFieldSchemaList( - FieldSchema... fields) throws HCatException { - List result = new ArrayList( - fields.length); - - for (FieldSchema f : fields) { - result.add(HCatSchemaUtils.getHCatFieldSchema(f)); - } + public static String encodeBytes(byte[] bytes) { + StringBuffer strBuf = new StringBuffer(); - return result; + for (int i = 0; i < bytes.length; i++) { + strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a'))); + strBuf.append((char) (((bytes[i]) & 0xF) + ('a'))); } - public static List getHCatFieldSchemaList( - List fields) throws HCatException { - if (fields == null) { - return null; - } else { - List result = new ArrayList(); - for (FieldSchema f : fields) { - result.add(HCatSchemaUtils.getHCatFieldSchema(f)); - } - return result; - } - } + return strBuf.toString(); + } - public static HCatSchema extractSchema(Table table) throws HCatException { - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + public static byte[] decodeBytes(String str) { + byte[] bytes = new byte[str.length() / 2]; + for (int i = 0; i < str.length(); i += 2) { + char c = str.charAt(i); + bytes[i / 2] = (byte) ((c - 'a') << 4); + c = str.charAt(i + 1); + bytes[i / 2] += (c - 'a'); } + return bytes; + } - public static HCatSchema extractSchema(Partition partition) throws HCatException { - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols())); - } + public static List getHCatFieldSchemaList( + FieldSchema... fields) throws HCatException { + List result = new ArrayList( + fields.length); - public static List getFieldSchemaList( - List hcatFields) { - if (hcatFields == null) { - return null; - } else { - List result = new ArrayList(); - for (HCatFieldSchema f : hcatFields) { - result.add(HCatSchemaUtils.getFieldSchema(f)); - } - return result; - } + for (FieldSchema f : fields) { + result.add(HCatSchemaUtils.getHCatFieldSchema(f)); } - public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) - throws NoSuchObjectException, TException, MetaException { - return new Table(client.getTable(dbName, tableName)); + return result; + } + + public static List getHCatFieldSchemaList( + List fields) throws HCatException { + if (fields == null) { + return null; + } else { + List result = new ArrayList(); + for (FieldSchema f : fields) { + result.add(HCatSchemaUtils.getHCatFieldSchema(f)); + } + return result; + } + } + + public static HCatSchema extractSchema(Table table) throws HCatException { + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + } + + public static HCatSchema extractSchema(Partition partition) throws HCatException { + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols())); + } + + public static List getFieldSchemaList( + List hcatFields) { + if (hcatFields == null) { + return null; + } else { + List result = new ArrayList(); + for (HCatFieldSchema f : hcatFields) { + result.add(HCatSchemaUtils.getFieldSchema(f)); + } + return result; } + } - public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { - HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) + throws NoSuchObjectException, TException, MetaException { + return new Table(client.getTable(dbName, tableName)); + } - if (table.getPartitionKeys().size() != 0) { + public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { + HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); - // add partition keys to table schema - // NOTE : this assumes that we do not ever have ptn keys as columns - // inside the table schema as well! - for (FieldSchema fs : table.getPartitionKeys()) { - tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - return tableSchema; - } + if (table.getPartitionKeys().size() != 0) { - /** - * return the partition columns from a table instance - * - * @param table the instance to extract partition columns from - * @return HCatSchema instance which contains the partition columns - * @throws IOException - */ - public static HCatSchema getPartitionColumns(Table table) throws IOException { - HCatSchema cols = new HCatSchema(new LinkedList()); - if (table.getPartitionKeys().size() != 0) { - for (FieldSchema fs : table.getPartitionKeys()) { - cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - return cols; + // add partition keys to table schema + // NOTE : this assumes that we do not ever have ptn keys as columns + // inside the table schema as well! + for (FieldSchema fs : table.getPartitionKeys()) { + tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + return tableSchema; + } + + /** + * return the partition columns from a table instance + * + * @param table the instance to extract partition columns from + * @return HCatSchema instance which contains the partition columns + * @throws IOException + */ + public static HCatSchema getPartitionColumns(Table table) throws IOException { + HCatSchema cols = new HCatSchema(new LinkedList()); + if (table.getPartitionKeys().size() != 0) { + for (FieldSchema fs : table.getPartitionKeys()) { + cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + return cols; + } + + /** + * Validate partition schema, checks if the column types match between the + * partition and the existing table schema. Returns the list of columns + * present in the partition but not in the table. + * + * @param table the table + * @param partitionSchema the partition schema + * @return the list of newly added fields + * @throws IOException Signals that an I/O exception has occurred. + */ + public static List validatePartitionSchema(Table table, + HCatSchema partitionSchema) throws IOException { + Map partitionKeyMap = new HashMap(); + + for (FieldSchema field : table.getPartitionKeys()) { + partitionKeyMap.put(field.getName().toLowerCase(), field); } - /** - * Validate partition schema, checks if the column types match between the - * partition and the existing table schema. Returns the list of columns - * present in the partition but not in the table. - * - * @param table the table - * @param partitionSchema the partition schema - * @return the list of newly added fields - * @throws IOException Signals that an I/O exception has occurred. - */ - public static List validatePartitionSchema(Table table, - HCatSchema partitionSchema) throws IOException { - Map partitionKeyMap = new HashMap(); - - for (FieldSchema field : table.getPartitionKeys()) { - partitionKeyMap.put(field.getName().toLowerCase(), field); - } + List tableCols = table.getCols(); + List newFields = new ArrayList(); - List tableCols = table.getCols(); - List newFields = new ArrayList(); - - for (int i = 0; i < partitionSchema.getFields().size(); i++) { - - FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema - .getFields().get(i)); - - FieldSchema tableField; - if (i < tableCols.size()) { - tableField = tableCols.get(i); - - if (!tableField.getName().equalsIgnoreCase(field.getName())) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, - "Expected column <" + tableField.getName() - + "> at position " + (i + 1) - + ", found column <" + field.getName() - + ">"); - } - } else { - tableField = partitionKeyMap.get(field.getName().toLowerCase()); - - if (tableField != null) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" - + field.getName() + ">"); - } - } - - if (tableField == null) { - // field present in partition but not in table - newFields.add(field); - } else { - // field present in both. validate type has not changed - TypeInfo partitionType = TypeInfoUtils - .getTypeInfoFromTypeString(field.getType()); - TypeInfo tableType = TypeInfoUtils - .getTypeInfoFromTypeString(tableField.getType()); - - if (!partitionType.equals(tableType)) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" - + field.getName() + ">, expected <" - + tableType.getTypeName() + ">, got <" - + partitionType.getTypeName() + ">"); - } - } - } + for (int i = 0; i < partitionSchema.getFields().size(); i++) { - return newFields; - } + FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema + .getFields().get(i)); - /** - * Test if the first FsAction is more permissive than the second. This is - * useful in cases where we want to ensure that a file owner has more - * permissions than the group they belong to, for eg. More completely(but - * potentially more cryptically) owner-r >= group-r >= world-r : bitwise - * and-masked with 0444 => 444 >= 440 >= 400 >= 000 owner-w >= group-w >= - * world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000 - * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= - * 110 >= 100 >= 000 - * - * @return true if first FsAction is more permissive than the second, false - * if not. - */ - public static boolean validateMorePermissive(FsAction first, FsAction second) { - if ((first == FsAction.ALL) || (second == FsAction.NONE) - || (first == second)) { - return true; - } - switch (first) { - case READ_EXECUTE: - return ((second == FsAction.READ) || (second == FsAction.EXECUTE)); - case READ_WRITE: - return ((second == FsAction.READ) || (second == FsAction.WRITE)); - case WRITE_EXECUTE: - return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE)); + FieldSchema tableField; + if (i < tableCols.size()) { + tableField = tableCols.get(i); + + if (!tableField.getName().equalsIgnoreCase(field.getName())) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, + "Expected column <" + tableField.getName() + + "> at position " + (i + 1) + + ", found column <" + field.getName() + + ">"); } - return false; - } + } else { + tableField = partitionKeyMap.get(field.getName().toLowerCase()); - /** - * Ensure that read or write permissions are not granted without also - * granting execute permissions. Essentially, r-- , rw- and -w- are invalid, - * r-x, -wx, rwx, ---, --x are valid - * - * @param perms The FsAction to verify - * @return true if the presence of read or write permission is accompanied - * by execute permissions - */ - public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms) { - if ((perms == FsAction.READ) || (perms == FsAction.WRITE) - || (perms == FsAction.READ_WRITE)) { - return false; + if (tableField != null) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + + field.getName() + ">"); + } + } + + if (tableField == null) { + // field present in partition but not in table + newFields.add(field); + } else { + // field present in both. validate type has not changed + TypeInfo partitionType = TypeInfoUtils + .getTypeInfoFromTypeString(field.getType()); + TypeInfo tableType = TypeInfoUtils + .getTypeInfoFromTypeString(tableField.getType()); + + if (!partitionType.equals(tableType)) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + + field.getName() + ">, expected <" + + tableType.getTypeName() + ">, got <" + + partitionType.getTypeName() + ">"); } - return true; + } } - public static Token getJobTrackerDelegationToken( - Configuration conf, String userName) throws Exception { - // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")"); - JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class)); - Token t = jcl - .getDelegationToken(new Text(userName)); - // LOG.info("got "+t); - return t; - - // return null; + return newFields; + } + + /** + * Test if the first FsAction is more permissive than the second. This is + * useful in cases where we want to ensure that a file owner has more + * permissions than the group they belong to, for eg. More completely(but + * potentially more cryptically) owner-r >= group-r >= world-r : bitwise + * and-masked with 0444 => 444 >= 440 >= 400 >= 000 owner-w >= group-w >= + * world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000 + * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= + * 110 >= 100 >= 000 + * + * @return true if first FsAction is more permissive than the second, false + * if not. + */ + public static boolean validateMorePermissive(FsAction first, FsAction second) { + if ((first == FsAction.ALL) || (second == FsAction.NONE) + || (first == second)) { + return true; } - - public static Token extractThriftToken( - String tokenStrForm, String tokenSignature) throws MetaException, - TException, IOException { - // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")"); - Token t = new Token(); - t.decodeFromUrlString(tokenStrForm); - t.setService(new Text(tokenSignature)); - // LOG.info("returning "+t); - return t; + switch (first) { + case READ_EXECUTE: + return ((second == FsAction.READ) || (second == FsAction.EXECUTE)); + case READ_WRITE: + return ((second == FsAction.READ) || (second == FsAction.WRITE)); + case WRITE_EXECUTE: + return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE)); } - - /** - * Create an instance of a storage handler defined in storerInfo. If one cannot be found - * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. - * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. - * @param conf job's configuration will be used to configure the Configurable StorageHandler - * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe - * @return storageHandler instance - * @throws IOException - */ - public static HCatStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { - return getStorageHandler(conf, - storerInfo.getStorageHandlerClass(), - storerInfo.getSerdeClass(), - storerInfo.getIfClass(), - storerInfo.getOfClass()); + return false; + } + + /** + * Ensure that read or write permissions are not granted without also + * granting execute permissions. Essentially, r-- , rw- and -w- are invalid, + * r-x, -wx, rwx, ---, --x are valid + * + * @param perms The FsAction to verify + * @return true if the presence of read or write permission is accompanied + * by execute permissions + */ + public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms) { + if ((perms == FsAction.READ) || (perms == FsAction.WRITE) + || (perms == FsAction.READ_WRITE)) { + return false; } - - public static HCatStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { - return HCatUtil.getStorageHandler( - conf, - partitionInfo.getStorageHandlerClassName(), - partitionInfo.getSerdeClassName(), - partitionInfo.getInputFormatClassName(), - partitionInfo.getOutputFormatClassName()); + return true; + } + + public static Token getJobTrackerDelegationToken( + Configuration conf, String userName) throws Exception { + // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")"); + JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class)); + Token t = jcl + .getDelegationToken(new Text(userName)); + // LOG.info("got "+t); + return t; + + // return null; + } + + public static Token extractThriftToken( + String tokenStrForm, String tokenSignature) throws MetaException, + TException, IOException { + // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")"); + Token t = new Token(); + t.decodeFromUrlString(tokenStrForm); + t.setService(new Text(tokenSignature)); + // LOG.info("returning "+t); + return t; + } + + /** + * Create an instance of a storage handler defined in storerInfo. If one cannot be found + * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. + * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. + * @param conf job's configuration will be used to configure the Configurable StorageHandler + * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe + * @return storageHandler instance + * @throws IOException + */ + public static HCatStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { + return getStorageHandler(conf, + storerInfo.getStorageHandlerClass(), + storerInfo.getSerdeClass(), + storerInfo.getIfClass(), + storerInfo.getOfClass()); + } + + public static HCatStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { + return HCatUtil.getStorageHandler( + conf, + partitionInfo.getStorageHandlerClassName(), + partitionInfo.getSerdeClassName(), + partitionInfo.getInputFormatClassName(), + partitionInfo.getOutputFormatClassName()); + } + + /** + * Create an instance of a storage handler. If storageHandler == null, + * then surrrogate StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. + * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. + * @param conf job's configuration will be used to configure the Configurable StorageHandler + * @param storageHandler fully qualified class name of the desired StorageHandle instance + * @param serDe fully qualified class name of the desired SerDe instance + * @param inputFormat fully qualified class name of the desired InputFormat instance + * @param outputFormat fully qualified class name of the desired outputFormat instance + * @return storageHandler instance + * @throws IOException + */ + public static HCatStorageHandler getStorageHandler(Configuration conf, + String storageHandler, + String serDe, + String inputFormat, + String outputFormat) + throws IOException { + + if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) { + try { + FosterStorageHandler fosterStorageHandler = + new FosterStorageHandler(inputFormat, outputFormat, serDe); + fosterStorageHandler.setConf(conf); + return fosterStorageHandler; + } catch (ClassNotFoundException e) { + throw new IOException("Failed to load " + + "foster storage handler", e); + } } - /** - * Create an instance of a storage handler. If storageHandler == null, - * then surrrogate StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. - * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. - * @param conf job's configuration will be used to configure the Configurable StorageHandler - * @param storageHandler fully qualified class name of the desired StorageHandle instance - * @param serDe fully qualified class name of the desired SerDe instance - * @param inputFormat fully qualified class name of the desired InputFormat instance - * @param outputFormat fully qualified class name of the desired outputFormat instance - * @return storageHandler instance - * @throws IOException - */ - public static HCatStorageHandler getStorageHandler(Configuration conf, - String storageHandler, - String serDe, - String inputFormat, - String outputFormat) - throws IOException { - - if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) { - try { - FosterStorageHandler fosterStorageHandler = - new FosterStorageHandler(inputFormat, outputFormat, serDe); - fosterStorageHandler.setConf(conf); - return fosterStorageHandler; - } catch (ClassNotFoundException e) { - throw new IOException("Failed to load " - + "foster storage handler", e); - } - } - - try { - Class handlerClass = - (Class) Class - .forName(storageHandler, true, JavaUtils.getClassLoader()); - return (HCatStorageHandler) ReflectionUtils.newInstance( - handlerClass, conf); - } catch (ClassNotFoundException e) { - throw new IOException("Error in loading storage handler." - + e.getMessage(), e); - } + try { + Class handlerClass = + (Class) Class + .forName(storageHandler, true, JavaUtils.getClassLoader()); + return (HCatStorageHandler) ReflectionUtils.newInstance( + handlerClass, conf); + } catch (ClassNotFoundException e) { + throw new IOException("Error in loading storage handler." + + e.getMessage(), e); } - - public static Pair getDbAndTableName(String tableName) throws IOException { - String[] dbTableNametokens = tableName.split("\\."); - if (dbTableNametokens.length == 1) { - return new Pair(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - } else if (dbTableNametokens.length == 2) { - return new Pair(dbTableNametokens[0], dbTableNametokens[1]); - } else { - throw new IOException("tableName expected in the form " - + ".

or
. Got " + tableName); - } + } + + public static Pair getDbAndTableName(String tableName) throws IOException { + String[] dbTableNametokens = tableName.split("\\."); + if (dbTableNametokens.length == 1) { + return new Pair(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + } else if (dbTableNametokens.length == 2) { + return new Pair(dbTableNametokens[0], dbTableNametokens[1]); + } else { + throw new IOException("tableName expected in the form " + + ".
or
. Got " + tableName); + } + } + + public static Map + getInputJobProperties(HCatStorageHandler storageHandler, + InputJobInfo inputJobInfo) { + TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), + storageHandler.getInputFormatClass(), + storageHandler.getOutputFormatClass(), + inputJobInfo.getTableInfo().getStorerInfo().getProperties()); + if (tableDesc.getJobProperties() == null) { + tableDesc.setJobProperties(new HashMap()); } - public static Map - getInputJobProperties(HCatStorageHandler storageHandler, - InputJobInfo inputJobInfo) { - TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), - storageHandler.getInputFormatClass(), - storageHandler.getOutputFormatClass(), - inputJobInfo.getTableInfo().getStorerInfo().getProperties()); - if (tableDesc.getJobProperties() == null) { - tableDesc.setJobProperties(new HashMap()); - } - - Map jobProperties = new HashMap(); - try { - tableDesc.getJobProperties().put( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(inputJobInfo)); - - storageHandler.configureInputJobProperties(tableDesc, - jobProperties); + Map jobProperties = new HashMap(); + try { + tableDesc.getJobProperties().put( + HCatConstants.HCAT_KEY_JOB_INFO, + HCatUtil.serialize(inputJobInfo)); - } catch (IOException e) { - throw new IllegalStateException( - "Failed to configure StorageHandler", e); - } + storageHandler.configureInputJobProperties(tableDesc, + jobProperties); - return jobProperties; + } catch (IOException e) { + throw new IllegalStateException( + "Failed to configure StorageHandler", e); } - @InterfaceAudience.Private - @InterfaceStability.Evolving - public static void - configureOutputStorageHandler(HCatStorageHandler storageHandler, - Configuration conf, - OutputJobInfo outputJobInfo) { - //TODO replace IgnoreKeyTextOutputFormat with a - //HiveOutputFormatWrapper in StorageHandler - TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), - storageHandler.getInputFormatClass(), - IgnoreKeyTextOutputFormat.class, - outputJobInfo.getTableInfo().getStorerInfo().getProperties()); - if (tableDesc.getJobProperties() == null) - tableDesc.setJobProperties(new HashMap()); - for (Map.Entry el : conf) { - tableDesc.getJobProperties().put(el.getKey(), el.getValue()); - } - - Map jobProperties = new HashMap(); - try { - tableDesc.getJobProperties().put( - HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - - storageHandler.configureOutputJobProperties(tableDesc, - jobProperties); - - for (Map.Entry el : jobProperties.entrySet()) { - conf.set(el.getKey(), el.getValue()); - } - } catch (IOException e) { - throw new IllegalStateException( - "Failed to configure StorageHandler", e); - } + return jobProperties; + } + + @InterfaceAudience.Private + @InterfaceStability.Evolving + public static void + configureOutputStorageHandler(HCatStorageHandler storageHandler, + Configuration conf, + OutputJobInfo outputJobInfo) { + //TODO replace IgnoreKeyTextOutputFormat with a + //HiveOutputFormatWrapper in StorageHandler + TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), + storageHandler.getInputFormatClass(), + IgnoreKeyTextOutputFormat.class, + outputJobInfo.getTableInfo().getStorerInfo().getProperties()); + if (tableDesc.getJobProperties() == null) + tableDesc.setJobProperties(new HashMap()); + for (Map.Entry el : conf) { + tableDesc.getJobProperties().put(el.getKey(), el.getValue()); } - /** - * Replace the contents of dest with the contents of src - * @param src - * @param dest - */ - public static void copyConf(Configuration src, Configuration dest) { - dest.clear(); - for (Map.Entry el : src) { - dest.set(el.getKey(), el.getValue()); - } + Map jobProperties = new HashMap(); + try { + tableDesc.getJobProperties().put( + HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + + storageHandler.configureOutputJobProperties(tableDesc, + jobProperties); + + for (Map.Entry el : jobProperties.entrySet()) { + conf.set(el.getKey(), el.getValue()); + } + } catch (IOException e) { + throw new IllegalStateException( + "Failed to configure StorageHandler", e); } - - /** - * Get or create a hive client depending on whether it exits in cache or not - * @param hiveConf The hive configuration - * @return the client - * @throws MetaException When HiveMetaStoreClient couldn't be created - * @throws IOException - */ - public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf) - throws MetaException, IOException { - - // Singleton behaviour: create the cache instance if required. The cache needs to be created lazily and - // using the expiry time available in hiveConf. - + } + + /** + * Replace the contents of dest with the contents of src + * @param src + * @param dest + */ + public static void copyConf(Configuration src, Configuration dest) { + dest.clear(); + for (Map.Entry el : src) { + dest.set(el.getKey(), el.getValue()); + } + } + + /** + * Get or create a hive client depending on whether it exits in cache or not + * @param hiveConf The hive configuration + * @return the client + * @throws MetaException When HiveMetaStoreClient couldn't be created + * @throws IOException + */ + public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf) + throws MetaException, IOException { + + // Singleton behaviour: create the cache instance if required. The cache needs to be created lazily and + // using the expiry time available in hiveConf. + + if (hiveClientCache == null) { + synchronized (HiveMetaStoreClient.class) { if (hiveClientCache == null) { - synchronized (HiveMetaStoreClient.class) { - if (hiveClientCache == null) { - hiveClientCache = new HiveClientCache(hiveConf.getInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, - DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS)); - } - } - } - try { - return hiveClientCache.get(hiveConf); - } catch (LoginException e) { - throw new IOException("Couldn't create hiveMetaStoreClient, Error getting UGI for user", e); + hiveClientCache = new HiveClientCache(hiveConf.getInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, + DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS)); } + } } - - public static void closeHiveClientQuietly(HiveMetaStoreClient client) { - try { - if (client != null) - client.close(); - } catch (Exception e) { - LOG.debug("Error closing metastore client. Ignored the error.", e); + try { + return hiveClientCache.get(hiveConf); + } catch (LoginException e) { + throw new IOException("Couldn't create hiveMetaStoreClient, Error getting UGI for user", e); + } + } + + public static void closeHiveClientQuietly(HiveMetaStoreClient client) { + try { + if (client != null) + client.close(); + } catch (Exception e) { + LOG.debug("Error closing metastore client. Ignored the error.", e); + } + } + + public static HiveConf getHiveConf(Configuration conf) + throws IOException { + + HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); + + //copy the hive conf into the job conf and restore it + //in the backend context + if (conf.get(HCatConstants.HCAT_KEY_HIVE_CONF) == null) { + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + } else { + //Copy configuration properties into the hive conf + Properties properties = (Properties) HCatUtil.deserialize( + conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); + + for (Map.Entry prop : properties.entrySet()) { + if (prop.getValue() instanceof String) { + hiveConf.set((String) prop.getKey(), (String) prop.getValue()); + } else if (prop.getValue() instanceof Integer) { + hiveConf.setInt((String) prop.getKey(), + (Integer) prop.getValue()); + } else if (prop.getValue() instanceof Boolean) { + hiveConf.setBoolean((String) prop.getKey(), + (Boolean) prop.getValue()); + } else if (prop.getValue() instanceof Long) { + hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue()); + } else if (prop.getValue() instanceof Float) { + hiveConf.setFloat((String) prop.getKey(), + (Float) prop.getValue()); } + } } - public static HiveConf getHiveConf(Configuration conf) - throws IOException { - - HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); - - //copy the hive conf into the job conf and restore it - //in the backend context - if (conf.get(HCatConstants.HCAT_KEY_HIVE_CONF) == null) { - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - } else { - //Copy configuration properties into the hive conf - Properties properties = (Properties) HCatUtil.deserialize( - conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); - - for (Map.Entry prop : properties.entrySet()) { - if (prop.getValue() instanceof String) { - hiveConf.set((String) prop.getKey(), (String) prop.getValue()); - } else if (prop.getValue() instanceof Integer) { - hiveConf.setInt((String) prop.getKey(), - (Integer) prop.getValue()); - } else if (prop.getValue() instanceof Boolean) { - hiveConf.setBoolean((String) prop.getKey(), - (Boolean) prop.getValue()); - } else if (prop.getValue() instanceof Long) { - hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue()); - } else if (prop.getValue() instanceof Float) { - hiveConf.setFloat((String) prop.getKey(), - (Float) prop.getValue()); - } - } - } + if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + hiveConf.set("hive.metastore.token.signature", + conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); + } - if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - hiveConf.set("hive.metastore.token.signature", - conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); - } + return hiveConf; + } - return hiveConf; - } + public static JobConf getJobConfFromContext(JobContext jobContext) { + JobConf jobConf; + // we need to convert the jobContext into a jobConf + // 0.18 jobConf (Hive) vs 0.20+ jobContext (HCat) + // begin conversion.. + jobConf = new JobConf(jobContext.getConfiguration()); + // ..end of conversion - public static JobConf getJobConfFromContext(JobContext jobContext) { - JobConf jobConf; - // we need to convert the jobContext into a jobConf - // 0.18 jobConf (Hive) vs 0.20+ jobContext (HCat) - // begin conversion.. - jobConf = new JobConf(jobContext.getConfiguration()); - // ..end of conversion + return jobConf; + } - return jobConf; + public static void copyJobPropertiesToJobConf( + Map jobProperties, JobConf jobConf) { + for (Map.Entry entry : jobProperties.entrySet()) { + jobConf.set(entry.getKey(), entry.getValue()); } + } - public static void copyJobPropertiesToJobConf( - Map jobProperties, JobConf jobConf) { - for (Map.Entry entry : jobProperties.entrySet()) { - jobConf.set(entry.getKey(), entry.getValue()); - } - } - - public static boolean isHadoop23() { - String version = org.apache.hadoop.util.VersionInfo.getVersion(); - if (version.matches("\\b0\\.23\\..+\\b")||version.matches("\\b2\\..*")) - return true; - return false; - } + public static boolean isHadoop23() { + String version = org.apache.hadoop.util.VersionInfo.getVersion(); + if (version.matches("\\b0\\.23\\..+\\b")||version.matches("\\b2\\..*")) + return true; + return false; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java index 3c9a86b..01a2723 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java @@ -46,292 +46,292 @@ * A thread safe time expired cache for HiveMetaStoreClient */ class HiveClientCache { - final private Cache hiveCache; - private static final Logger LOG = LoggerFactory.getLogger(HiveClientCache.class); - private final int timeout; - // This lock is used to make sure removalListener won't close a client that is being contemplated for returning by get() - private final Object CACHE_TEARDOWN_LOCK = new Object(); + final private Cache hiveCache; + private static final Logger LOG = LoggerFactory.getLogger(HiveClientCache.class); + private final int timeout; + // This lock is used to make sure removalListener won't close a client that is being contemplated for returning by get() + private final Object CACHE_TEARDOWN_LOCK = new Object(); - private static final AtomicInteger nextId = new AtomicInteger(0); + private static final AtomicInteger nextId = new AtomicInteger(0); - // Since HiveMetaStoreClient is not threadsafe, hive clients are not shared across threads. - // Thread local variable containing each thread's unique ID, is used as one of the keys for the cache - // causing each thread to get a different client even if the hiveConf is same. - private static final ThreadLocal threadId = - new ThreadLocal() { - @Override - protected Integer initialValue() { - return nextId.getAndIncrement(); - } - }; - - private int getThreadId() { - return threadId.get(); - } - - /** - * @param timeout the length of time in seconds after a client is created that it should be automatically removed - */ - public HiveClientCache(final int timeout) { - this.timeout = timeout; - RemovalListener removalListener = - new RemovalListener() { - public void onRemoval(RemovalNotification notification) { - CacheableHiveMetaStoreClient hiveMetaStoreClient = notification.getValue(); - if (hiveMetaStoreClient != null) { - synchronized (CACHE_TEARDOWN_LOCK) { - hiveMetaStoreClient.setExpiredFromCache(); - hiveMetaStoreClient.tearDownIfUnused(); - } - } - } - }; - hiveCache = CacheBuilder.newBuilder() - .expireAfterWrite(timeout, TimeUnit.SECONDS) - .removalListener(removalListener) - .build(); + // Since HiveMetaStoreClient is not threadsafe, hive clients are not shared across threads. + // Thread local variable containing each thread's unique ID, is used as one of the keys for the cache + // causing each thread to get a different client even if the hiveConf is same. + private static final ThreadLocal threadId = + new ThreadLocal() { + @Override + protected Integer initialValue() { + return nextId.getAndIncrement(); + } + }; - // Add a shutdown hook for cleanup, if there are elements remaining in the cache which were not cleaned up. - // This is the best effort approach. Ignore any error while doing so. Notice that most of the clients - // would get cleaned up via either the removalListener or the close() call, only the active clients - // that are in the cache or expired but being used in other threads wont get cleaned. The following code will only - // clean the active cache ones. The ones expired from cache but being hold by other threads are in the mercy - // of finalize() being called. - Thread cleanupHiveClientShutdownThread = new Thread() { - @Override - public void run() { - LOG.debug("Cleaning up hive client cache in ShutDown hook"); - closeAllClientsQuietly(); - } - }; - Runtime.getRuntime().addShutdownHook(cleanupHiveClientShutdownThread); - } + private int getThreadId() { + return threadId.get(); + } - /** - * Note: This doesn't check if they are being used or not, meant only to be called during shutdown etc. - */ - void closeAllClientsQuietly() { - try { - ConcurrentMap elements = hiveCache.asMap(); - for (CacheableHiveMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) { - cacheableHiveMetaStoreClient.tearDown(); + /** + * @param timeout the length of time in seconds after a client is created that it should be automatically removed + */ + public HiveClientCache(final int timeout) { + this.timeout = timeout; + RemovalListener removalListener = + new RemovalListener() { + public void onRemoval(RemovalNotification notification) { + CacheableHiveMetaStoreClient hiveMetaStoreClient = notification.getValue(); + if (hiveMetaStoreClient != null) { + synchronized (CACHE_TEARDOWN_LOCK) { + hiveMetaStoreClient.setExpiredFromCache(); + hiveMetaStoreClient.tearDownIfUnused(); } - } catch (Exception e) { - LOG.warn("Clean up of hive clients in the cache failed. Ignored", e); + } } - } + }; + hiveCache = CacheBuilder.newBuilder() + .expireAfterWrite(timeout, TimeUnit.SECONDS) + .removalListener(removalListener) + .build(); + + // Add a shutdown hook for cleanup, if there are elements remaining in the cache which were not cleaned up. + // This is the best effort approach. Ignore any error while doing so. Notice that most of the clients + // would get cleaned up via either the removalListener or the close() call, only the active clients + // that are in the cache or expired but being used in other threads wont get cleaned. The following code will only + // clean the active cache ones. The ones expired from cache but being hold by other threads are in the mercy + // of finalize() being called. + Thread cleanupHiveClientShutdownThread = new Thread() { + @Override + public void run() { + LOG.debug("Cleaning up hive client cache in ShutDown hook"); + closeAllClientsQuietly(); + } + }; + Runtime.getRuntime().addShutdownHook(cleanupHiveClientShutdownThread); + } - public void cleanup() { - hiveCache.cleanUp(); + /** + * Note: This doesn't check if they are being used or not, meant only to be called during shutdown etc. + */ + void closeAllClientsQuietly() { + try { + ConcurrentMap elements = hiveCache.asMap(); + for (CacheableHiveMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) { + cacheableHiveMetaStoreClient.tearDown(); + } + } catch (Exception e) { + LOG.warn("Clean up of hive clients in the cache failed. Ignored", e); } + } - /** - * Returns a cached client if exists or else creates one, caches and returns it. It also checks that the client is - * healthy and can be reused - * @param hiveConf - * @return the hive client - * @throws MetaException - * @throws IOException - * @throws LoginException - */ - public HiveMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException { - final HiveClientCacheKey cacheKey = HiveClientCacheKey.fromHiveConf(hiveConf, getThreadId()); - CacheableHiveMetaStoreClient hiveMetaStoreClient = null; - // the hmsc is not shared across threads. So the only way it could get closed while we are doing healthcheck - // is if removalListener closes it. The synchronization takes care that removalListener won't do it - synchronized (CACHE_TEARDOWN_LOCK) { - hiveMetaStoreClient = getOrCreate(cacheKey); - hiveMetaStoreClient.acquire(); - } - if (!hiveMetaStoreClient.isOpen()) { - synchronized (CACHE_TEARDOWN_LOCK) { - hiveCache.invalidate(cacheKey); - hiveMetaStoreClient.close(); - hiveMetaStoreClient = getOrCreate(cacheKey); - hiveMetaStoreClient.acquire(); - } - } - return hiveMetaStoreClient; + public void cleanup() { + hiveCache.cleanUp(); + } + + /** + * Returns a cached client if exists or else creates one, caches and returns it. It also checks that the client is + * healthy and can be reused + * @param hiveConf + * @return the hive client + * @throws MetaException + * @throws IOException + * @throws LoginException + */ + public HiveMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException { + final HiveClientCacheKey cacheKey = HiveClientCacheKey.fromHiveConf(hiveConf, getThreadId()); + CacheableHiveMetaStoreClient hiveMetaStoreClient = null; + // the hmsc is not shared across threads. So the only way it could get closed while we are doing healthcheck + // is if removalListener closes it. The synchronization takes care that removalListener won't do it + synchronized (CACHE_TEARDOWN_LOCK) { + hiveMetaStoreClient = getOrCreate(cacheKey); + hiveMetaStoreClient.acquire(); + } + if (!hiveMetaStoreClient.isOpen()) { + synchronized (CACHE_TEARDOWN_LOCK) { + hiveCache.invalidate(cacheKey); + hiveMetaStoreClient.close(); + hiveMetaStoreClient = getOrCreate(cacheKey); + hiveMetaStoreClient.acquire(); + } } + return hiveMetaStoreClient; + } - /** - * Return from cache if exists else create/cache and return - * @param cacheKey - * @return - * @throws IOException - * @throws MetaException - * @throws LoginException - */ - private CacheableHiveMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey) throws IOException, MetaException, LoginException { - try { - return hiveCache.get(cacheKey, new Callable() { - @Override - public CacheableHiveMetaStoreClient call() throws MetaException { - return new CacheableHiveMetaStoreClient(cacheKey.getHiveConf(), timeout); - } - }); - } catch (ExecutionException e) { - Throwable t = e.getCause(); - if (t instanceof IOException) { - throw (IOException) t; - } else if (t instanceof MetaException) { - throw (MetaException) t; - } else if (t instanceof LoginException) { - throw (LoginException) t; - } else { - throw new IOException("Error creating hiveMetaStoreClient", t); - } + /** + * Return from cache if exists else create/cache and return + * @param cacheKey + * @return + * @throws IOException + * @throws MetaException + * @throws LoginException + */ + private CacheableHiveMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey) throws IOException, MetaException, LoginException { + try { + return hiveCache.get(cacheKey, new Callable() { + @Override + public CacheableHiveMetaStoreClient call() throws MetaException { + return new CacheableHiveMetaStoreClient(cacheKey.getHiveConf(), timeout); } + }); + } catch (ExecutionException e) { + Throwable t = e.getCause(); + if (t instanceof IOException) { + throw (IOException) t; + } else if (t instanceof MetaException) { + throw (MetaException) t; + } else if (t instanceof LoginException) { + throw (LoginException) t; + } else { + throw new IOException("Error creating hiveMetaStoreClient", t); + } } + } - /** - * A class to wrap HiveConf and expose equality based only on UserGroupInformation and the metaStoreURIs. - * This becomes the key for the cache and this way the same HiveMetaStoreClient would be returned if - * UserGroupInformation and metaStoreURIs are same. This function can evolve to express - * the cases when HiveConf is different but the same hiveMetaStoreClient can be used - */ - public static class HiveClientCacheKey { - final private String metaStoreURIs; - final private UserGroupInformation ugi; - final private HiveConf hiveConf; - final private int threadId; + /** + * A class to wrap HiveConf and expose equality based only on UserGroupInformation and the metaStoreURIs. + * This becomes the key for the cache and this way the same HiveMetaStoreClient would be returned if + * UserGroupInformation and metaStoreURIs are same. This function can evolve to express + * the cases when HiveConf is different but the same hiveMetaStoreClient can be used + */ + public static class HiveClientCacheKey { + final private String metaStoreURIs; + final private UserGroupInformation ugi; + final private HiveConf hiveConf; + final private int threadId; - private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { - this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); - ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); - this.hiveConf = hiveConf; - this.threadId = threadId; - } + private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { + this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); + ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + this.hiveConf = hiveConf; + this.threadId = threadId; + } - public static HiveClientCacheKey fromHiveConf(HiveConf hiveConf, final int threadId) throws IOException, LoginException { - return new HiveClientCacheKey(hiveConf, threadId); - } + public static HiveClientCacheKey fromHiveConf(HiveConf hiveConf, final int threadId) throws IOException, LoginException { + return new HiveClientCacheKey(hiveConf, threadId); + } - public HiveConf getHiveConf() { - return hiveConf; - } + public HiveConf getHiveConf() { + return hiveConf; + } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - HiveClientCacheKey that = (HiveClientCacheKey) o; - return new EqualsBuilder(). - append(this.metaStoreURIs, - that.metaStoreURIs). - append(this.ugi, that.ugi). - append(this.threadId, that.threadId).isEquals(); - } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + HiveClientCacheKey that = (HiveClientCacheKey) o; + return new EqualsBuilder(). + append(this.metaStoreURIs, + that.metaStoreURIs). + append(this.ugi, that.ugi). + append(this.threadId, that.threadId).isEquals(); + } - @Override - public int hashCode() { - return new HashCodeBuilder(). - append(metaStoreURIs). - append(ugi). - append(threadId).toHashCode(); - } + @Override + public int hashCode() { + return new HashCodeBuilder(). + append(metaStoreURIs). + append(ugi). + append(threadId).toHashCode(); } + } - /** - * Add # of current users on HiveMetaStoreClient, so that the client can be cleaned when no one is using it. - */ - public static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient { - private AtomicInteger users = new AtomicInteger(0); - private volatile boolean expiredFromCache = false; - private boolean isClosed = false; - private final long expiryTime; - private static final int EXPIRY_TIME_EXTENSION_IN_MILLIS = 60 * 1000; + /** + * Add # of current users on HiveMetaStoreClient, so that the client can be cleaned when no one is using it. + */ + public static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient { + private AtomicInteger users = new AtomicInteger(0); + private volatile boolean expiredFromCache = false; + private boolean isClosed = false; + private final long expiryTime; + private static final int EXPIRY_TIME_EXTENSION_IN_MILLIS = 60 * 1000; - public CacheableHiveMetaStoreClient(final HiveConf conf, final int timeout) throws MetaException { - super(conf); - // Extend the expiry time with some extra time on top of guava expiry time to make sure - // that items closed() are for sure expired and would never be returned by guava. - this.expiryTime = System.currentTimeMillis() + timeout * 1000 + EXPIRY_TIME_EXTENSION_IN_MILLIS; - } + public CacheableHiveMetaStoreClient(final HiveConf conf, final int timeout) throws MetaException { + super(conf); + // Extend the expiry time with some extra time on top of guava expiry time to make sure + // that items closed() are for sure expired and would never be returned by guava. + this.expiryTime = System.currentTimeMillis() + timeout * 1000 + EXPIRY_TIME_EXTENSION_IN_MILLIS; + } - private void acquire() { - users.incrementAndGet(); - } + private void acquire() { + users.incrementAndGet(); + } - private void release() { - users.decrementAndGet(); - } + private void release() { + users.decrementAndGet(); + } - public void setExpiredFromCache() { - expiredFromCache = true; - } + public void setExpiredFromCache() { + expiredFromCache = true; + } - public boolean isClosed() { - return isClosed; - } + public boolean isClosed() { + return isClosed; + } - /** - * Make a call to hive meta store and see if the client is still usable. Some calls where the user provides - * invalid data renders the client unusable for future use (example: create a table with very long table name) - * @return - */ - protected boolean isOpen() { - try { - // Look for an unlikely database name and see if either MetaException or TException is thrown - this.getDatabase("NonExistentDatabaseUsedForHealthCheck"); - } catch (NoSuchObjectException e) { - return true; // It is okay if the database doesn't exist - } catch (MetaException e) { - return false; - } catch (TException e) { - return false; - } - return true; - } + /** + * Make a call to hive meta store and see if the client is still usable. Some calls where the user provides + * invalid data renders the client unusable for future use (example: create a table with very long table name) + * @return + */ + protected boolean isOpen() { + try { + // Look for an unlikely database name and see if either MetaException or TException is thrown + this.getDatabase("NonExistentDatabaseUsedForHealthCheck"); + } catch (NoSuchObjectException e) { + return true; // It is okay if the database doesn't exist + } catch (MetaException e) { + return false; + } catch (TException e) { + return false; + } + return true; + } - /** - * Decrement the user count and piggyback this to set expiry flag as well, then teardown(), if conditions are met. - * This *MUST* be called by anyone who uses this client. - */ - @Override - public void close() { - release(); - if (System.currentTimeMillis() >= expiryTime) - setExpiredFromCache(); - tearDownIfUnused(); - } + /** + * Decrement the user count and piggyback this to set expiry flag as well, then teardown(), if conditions are met. + * This *MUST* be called by anyone who uses this client. + */ + @Override + public void close() { + release(); + if (System.currentTimeMillis() >= expiryTime) + setExpiredFromCache(); + tearDownIfUnused(); + } - /** - * Tear down only if - * 1. There are no active user - * 2. It has expired from the cache - */ - private void tearDownIfUnused() { - if (users.get() == 0 && expiredFromCache) { - this.tearDown(); - } - } + /** + * Tear down only if + * 1. There are no active user + * 2. It has expired from the cache + */ + private void tearDownIfUnused() { + if (users.get() == 0 && expiredFromCache) { + this.tearDown(); + } + } - /** - * Close if not closed already - */ - protected synchronized void tearDown() { - try { - if (!isClosed) { - super.close(); - } - isClosed = true; - } catch (Exception e) { - LOG.warn("Error closing hive metastore client. Ignored.", e); - } + /** + * Close if not closed already + */ + protected synchronized void tearDown() { + try { + if (!isClosed) { + super.close(); } + isClosed = true; + } catch (Exception e) { + LOG.warn("Error closing hive metastore client. Ignored.", e); + } + } - /** - * Last effort to clean up, may not even get called. - * @throws Throwable - */ - @Override - protected void finalize() throws Throwable { - try { - this.tearDown(); - } finally { - super.finalize(); - } - } + /** + * Last effort to clean up, may not even get called. + * @throws Throwable + */ + @Override + protected void finalize() throws Throwable { + try { + this.tearDown(); + } finally { + super.finalize(); + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DataType.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DataType.java index 9b36121..d002bf9 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DataType.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DataType.java @@ -28,180 +28,180 @@ public abstract class DataType { - public static final byte NULL = 1; - public static final byte BOOLEAN = 5; - public static final byte BYTE = 6; - public static final byte INTEGER = 10; - public static final byte SHORT = 11; - public static final byte LONG = 15; - public static final byte FLOAT = 20; - public static final byte DOUBLE = 25; - public static final byte STRING = 55; - public static final byte BINARY = 60; - - public static final byte MAP = 100; - public static final byte STRUCT = 110; - public static final byte LIST = 120; - public static final byte ERROR = -1; - - /** - * Determine the datatype of an object. - * @param o Object to test. - * @return byte code of the type, or ERROR if we don't know. - */ - public static byte findType(Object o) { - if (o == null) { - return NULL; - } + public static final byte NULL = 1; + public static final byte BOOLEAN = 5; + public static final byte BYTE = 6; + public static final byte INTEGER = 10; + public static final byte SHORT = 11; + public static final byte LONG = 15; + public static final byte FLOAT = 20; + public static final byte DOUBLE = 25; + public static final byte STRING = 55; + public static final byte BINARY = 60; + + public static final byte MAP = 100; + public static final byte STRUCT = 110; + public static final byte LIST = 120; + public static final byte ERROR = -1; + + /** + * Determine the datatype of an object. + * @param o Object to test. + * @return byte code of the type, or ERROR if we don't know. + */ + public static byte findType(Object o) { + if (o == null) { + return NULL; + } - Class clazz = o.getClass(); - - // Try to put the most common first - if (clazz == String.class) { - return STRING; - } else if (clazz == Integer.class) { - return INTEGER; - } else if (clazz == Long.class) { - return LONG; - } else if (clazz == Float.class) { - return FLOAT; - } else if (clazz == Double.class) { - return DOUBLE; - } else if (clazz == Boolean.class) { - return BOOLEAN; - } else if (clazz == Byte.class) { - return BYTE; - } else if (clazz == Short.class) { - return SHORT; - } else if (o instanceof List) { - return LIST; - } else if (o instanceof Map) { - return MAP; - } else if (o instanceof byte[]) { - return BINARY; - } else { - return ERROR; - } + Class clazz = o.getClass(); + + // Try to put the most common first + if (clazz == String.class) { + return STRING; + } else if (clazz == Integer.class) { + return INTEGER; + } else if (clazz == Long.class) { + return LONG; + } else if (clazz == Float.class) { + return FLOAT; + } else if (clazz == Double.class) { + return DOUBLE; + } else if (clazz == Boolean.class) { + return BOOLEAN; + } else if (clazz == Byte.class) { + return BYTE; + } else if (clazz == Short.class) { + return SHORT; + } else if (o instanceof List) { + return LIST; + } else if (o instanceof Map) { + return MAP; + } else if (o instanceof byte[]) { + return BINARY; + } else { + return ERROR; } + } - public static int compare(Object o1, Object o2) { + public static int compare(Object o1, Object o2) { - return compare(o1, o2, findType(o1), findType(o2)); - } + return compare(o1, o2, findType(o1), findType(o2)); + } - public static int compare(Object o1, Object o2, byte dt1, byte dt2) { - if (dt1 == dt2) { - switch (dt1) { - case NULL: - return 0; - - case BOOLEAN: - return ((Boolean) o1).compareTo((Boolean) o2); - - case BYTE: - return ((Byte) o1).compareTo((Byte) o2); - - case INTEGER: - return ((Integer) o1).compareTo((Integer) o2); - - case LONG: - return ((Long) o1).compareTo((Long) o2); - - case FLOAT: - return ((Float) o1).compareTo((Float) o2); - - case DOUBLE: - return ((Double) o1).compareTo((Double) o2); - - case STRING: - return ((String) o1).compareTo((String) o2); - - case SHORT: - return ((Short) o1).compareTo((Short) o2); - - case BINARY: - return compareByteArray((byte[]) o1, (byte[]) o2); - - case LIST: - List l1 = (List) o1; - List l2 = (List) o2; - int len = l1.size(); - if (len != l2.size()) { - return len - l2.size(); - } else { - for (int i = 0; i < len; i++) { - int cmpVal = compare(l1.get(i), l2.get(i)); - if (cmpVal != 0) { - return cmpVal; - } - } - return 0; - } - - case MAP: { - Map m1 = (Map) o1; - Map m2 = (Map) o2; - int sz1 = m1.size(); - int sz2 = m2.size(); - if (sz1 < sz2) { - return -1; - } else if (sz1 > sz2) { - return 1; - } else { - // This is bad, but we have to sort the keys of the maps in order - // to be commutative. - TreeMap tm1 = new TreeMap(m1); - TreeMap tm2 = new TreeMap(m2); - Iterator> i1 = tm1.entrySet().iterator(); - Iterator> i2 = tm2.entrySet().iterator(); - while (i1.hasNext()) { - Map.Entry entry1 = i1.next(); - Map.Entry entry2 = i2.next(); - int c = compare(entry1.getValue(), entry2.getValue()); - if (c != 0) { - return c; - } else { - c = compare(entry1.getValue(), entry2.getValue()); - if (c != 0) { - return c; - } - } - } - return 0; - } - } + public static int compare(Object o1, Object o2, byte dt1, byte dt2) { + if (dt1 == dt2) { + switch (dt1) { + case NULL: + return 0; - default: - throw new RuntimeException("Unkown type " + dt1 + - " in compare"); - } - } else { - return dt1 < dt2 ? -1 : 1; - } - } + case BOOLEAN: + return ((Boolean) o1).compareTo((Boolean) o2); - private static int compareByteArray(byte[] o1, byte[] o2) { + case BYTE: + return ((Byte) o1).compareTo((Byte) o2); - for (int i = 0; i < o1.length; i++) { - if (i == o2.length) { - return 1; - } - if (o1[i] == o2[i]) { - continue; + case INTEGER: + return ((Integer) o1).compareTo((Integer) o2); + + case LONG: + return ((Long) o1).compareTo((Long) o2); + + case FLOAT: + return ((Float) o1).compareTo((Float) o2); + + case DOUBLE: + return ((Double) o1).compareTo((Double) o2); + + case STRING: + return ((String) o1).compareTo((String) o2); + + case SHORT: + return ((Short) o1).compareTo((Short) o2); + + case BINARY: + return compareByteArray((byte[]) o1, (byte[]) o2); + + case LIST: + List l1 = (List) o1; + List l2 = (List) o2; + int len = l1.size(); + if (len != l2.size()) { + return len - l2.size(); + } else { + for (int i = 0; i < len; i++) { + int cmpVal = compare(l1.get(i), l2.get(i)); + if (cmpVal != 0) { + return cmpVal; } - if (o1[i] > o1[i]) { - return 1; + } + return 0; + } + + case MAP: { + Map m1 = (Map) o1; + Map m2 = (Map) o2; + int sz1 = m1.size(); + int sz2 = m2.size(); + if (sz1 < sz2) { + return -1; + } else if (sz1 > sz2) { + return 1; + } else { + // This is bad, but we have to sort the keys of the maps in order + // to be commutative. + TreeMap tm1 = new TreeMap(m1); + TreeMap tm2 = new TreeMap(m2); + Iterator> i1 = tm1.entrySet().iterator(); + Iterator> i2 = tm2.entrySet().iterator(); + while (i1.hasNext()) { + Map.Entry entry1 = i1.next(); + Map.Entry entry2 = i2.next(); + int c = compare(entry1.getValue(), entry2.getValue()); + if (c != 0) { + return c; } else { - return -1; + c = compare(entry1.getValue(), entry2.getValue()); + if (c != 0) { + return c; + } } + } + return 0; } + } + + default: + throw new RuntimeException("Unkown type " + dt1 + + " in compare"); + } + } else { + return dt1 < dt2 ? -1 : 1; + } + } + + private static int compareByteArray(byte[] o1, byte[] o2) { + + for (int i = 0; i < o1.length; i++) { + if (i == o2.length) { + return 1; + } + if (o1[i] == o2[i]) { + continue; + } + if (o1[i] > o1[i]) { + return 1; + } else { + return -1; + } + } - //bytes in o1 are same as o2 - //in case o2 was longer - if (o2.length > o1.length) { - return -1; - } - return 0; //equals + //bytes in o1 are same as o2 + //in case o2 was longer + if (o2.length > o1.length) { + return -1; } + return 0; //equals + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DefaultHCatRecord.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DefaultHCatRecord.java index c22a2b4..6749156 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DefaultHCatRecord.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DefaultHCatRecord.java @@ -30,102 +30,102 @@ public class DefaultHCatRecord extends HCatRecord { - private List contents; + private List contents; - public DefaultHCatRecord() { - contents = new ArrayList(); - } - - public DefaultHCatRecord(int size) { - contents = new ArrayList(size); - for (int i = 0; i < size; i++) { - contents.add(null); - } - } - - @Override - public void remove(int idx) throws HCatException { - contents.remove(idx); - } - - public DefaultHCatRecord(List list) { - contents = list; - } - - @Override - public Object get(int fieldNum) { - return contents.get(fieldNum); - } - - @Override - public List getAll() { - return contents; - } + public DefaultHCatRecord() { + contents = new ArrayList(); + } - @Override - public void set(int fieldNum, Object val) { - contents.set(fieldNum, val); + public DefaultHCatRecord(int size) { + contents = new ArrayList(size); + for (int i = 0; i < size; i++) { + contents.add(null); } - - @Override - public int size() { - return contents.size(); + } + + @Override + public void remove(int idx) throws HCatException { + contents.remove(idx); + } + + public DefaultHCatRecord(List list) { + contents = list; + } + + @Override + public Object get(int fieldNum) { + return contents.get(fieldNum); + } + + @Override + public List getAll() { + return contents; + } + + @Override + public void set(int fieldNum, Object val) { + contents.set(fieldNum, val); + } + + @Override + public int size() { + return contents.size(); + } + + @Override + public void readFields(DataInput in) throws IOException { + + contents.clear(); + int len = in.readInt(); + for (int i = 0; i < len; i++) { + contents.add(ReaderWriter.readDatum(in)); } - - @Override - public void readFields(DataInput in) throws IOException { - - contents.clear(); - int len = in.readInt(); - for (int i = 0; i < len; i++) { - contents.add(ReaderWriter.readDatum(in)); - } + } + + @Override + public void write(DataOutput out) throws IOException { + int sz = size(); + out.writeInt(sz); + for (int i = 0; i < sz; i++) { + ReaderWriter.writeDatum(out, contents.get(i)); } - @Override - public void write(DataOutput out) throws IOException { - int sz = size(); - out.writeInt(sz); - for (int i = 0; i < sz; i++) { - ReaderWriter.writeDatum(out, contents.get(i)); - } + } + @Override + public int hashCode() { + int hash = 1; + for (Object o : contents) { + if (o != null) { + hash = 31 * hash + o.hashCode(); + } } + return hash; + } - @Override - public int hashCode() { - int hash = 1; - for (Object o : contents) { - if (o != null) { - hash = 31 * hash + o.hashCode(); - } - } - return hash; - } - - @Override - public String toString() { - - StringBuilder sb = new StringBuilder(); - for (Object o : contents) { - sb.append(o + "\t"); - } - return sb.toString(); - } - - @Override - public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { - return get(recordSchema.getPosition(fieldName)); - } - - @Override - public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { - set(recordSchema.getPosition(fieldName), value); - } + @Override + public String toString() { - @Override - public void copy(HCatRecord r) throws HCatException { - this.contents = r.getAll(); + StringBuilder sb = new StringBuilder(); + for (Object o : contents) { + sb.append(o + "\t"); } + return sb.toString(); + } + + @Override + public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { + return get(recordSchema.getPosition(fieldName)); + } + + @Override + public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { + set(recordSchema.getPosition(fieldName), value); + } + + @Override + public void copy(HCatRecord r) throws HCatException { + this.contents = r.getAll(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecord.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecord.java index 14ed244..bb8b705 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecord.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecord.java @@ -33,117 +33,117 @@ */ public abstract class HCatRecord implements HCatRecordable { - public abstract Object get(String fieldName, HCatSchema recordSchema) throws HCatException; + public abstract Object get(String fieldName, HCatSchema recordSchema) throws HCatException; - public abstract void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException; + public abstract void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException; - public abstract void remove(int idx) throws HCatException; + public abstract void remove(int idx) throws HCatException; - public abstract void copy(HCatRecord r) throws HCatException; + public abstract void copy(HCatRecord r) throws HCatException; - protected Object get(String fieldName, HCatSchema recordSchema, Class clazz) throws HCatException { - // TODO : if needed, verify that recordschema entry for fieldname matches appropriate type. - return get(fieldName, recordSchema); - } + protected Object get(String fieldName, HCatSchema recordSchema, Class clazz) throws HCatException { + // TODO : if needed, verify that recordschema entry for fieldname matches appropriate type. + return get(fieldName, recordSchema); + } - public Boolean getBoolean(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Boolean) get(fieldName, recordSchema, Boolean.class); - } + public Boolean getBoolean(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Boolean) get(fieldName, recordSchema, Boolean.class); + } - public void setBoolean(String fieldName, HCatSchema recordSchema, Boolean value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setBoolean(String fieldName, HCatSchema recordSchema, Boolean value) throws HCatException { + set(fieldName, recordSchema, value); + } - public byte[] getByteArray(String fieldName, HCatSchema recordSchema) throws HCatException { - return (byte[]) get(fieldName, recordSchema, byte[].class); - } + public byte[] getByteArray(String fieldName, HCatSchema recordSchema) throws HCatException { + return (byte[]) get(fieldName, recordSchema, byte[].class); + } - public void setByteArray(String fieldName, HCatSchema recordSchema, byte[] value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setByteArray(String fieldName, HCatSchema recordSchema, byte[] value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Byte getByte(String fieldName, HCatSchema recordSchema) throws HCatException { - //TINYINT - return (Byte) get(fieldName, recordSchema, Byte.class); - } + public Byte getByte(String fieldName, HCatSchema recordSchema) throws HCatException { + //TINYINT + return (Byte) get(fieldName, recordSchema, Byte.class); + } - public void setByte(String fieldName, HCatSchema recordSchema, Byte value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setByte(String fieldName, HCatSchema recordSchema, Byte value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Short getShort(String fieldName, HCatSchema recordSchema) throws HCatException { - // SMALLINT - return (Short) get(fieldName, recordSchema, Short.class); - } + public Short getShort(String fieldName, HCatSchema recordSchema) throws HCatException { + // SMALLINT + return (Short) get(fieldName, recordSchema, Short.class); + } - public void setShort(String fieldName, HCatSchema recordSchema, Short value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setShort(String fieldName, HCatSchema recordSchema, Short value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Integer getInteger(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Integer) get(fieldName, recordSchema, Integer.class); - } + public Integer getInteger(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Integer) get(fieldName, recordSchema, Integer.class); + } - public void setInteger(String fieldName, HCatSchema recordSchema, Integer value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setInteger(String fieldName, HCatSchema recordSchema, Integer value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Long getLong(String fieldName, HCatSchema recordSchema) throws HCatException { - // BIGINT - return (Long) get(fieldName, recordSchema, Long.class); - } + public Long getLong(String fieldName, HCatSchema recordSchema) throws HCatException { + // BIGINT + return (Long) get(fieldName, recordSchema, Long.class); + } - public void setLong(String fieldName, HCatSchema recordSchema, Long value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setLong(String fieldName, HCatSchema recordSchema, Long value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Float getFloat(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Float) get(fieldName, recordSchema, Float.class); - } + public Float getFloat(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Float) get(fieldName, recordSchema, Float.class); + } - public void setFloat(String fieldName, HCatSchema recordSchema, Float value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setFloat(String fieldName, HCatSchema recordSchema, Float value) throws HCatException { + set(fieldName, recordSchema, value); + } - public Double getDouble(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Double) get(fieldName, recordSchema, Double.class); - } + public Double getDouble(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Double) get(fieldName, recordSchema, Double.class); + } - public void setDouble(String fieldName, HCatSchema recordSchema, Double value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setDouble(String fieldName, HCatSchema recordSchema, Double value) throws HCatException { + set(fieldName, recordSchema, value); + } - public String getString(String fieldName, HCatSchema recordSchema) throws HCatException { - return (String) get(fieldName, recordSchema, String.class); - } + public String getString(String fieldName, HCatSchema recordSchema) throws HCatException { + return (String) get(fieldName, recordSchema, String.class); + } - public void setString(String fieldName, HCatSchema recordSchema, String value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setString(String fieldName, HCatSchema recordSchema, String value) throws HCatException { + set(fieldName, recordSchema, value); + } - @SuppressWarnings("unchecked") - public List getStruct(String fieldName, HCatSchema recordSchema) throws HCatException { - return (List) get(fieldName, recordSchema, List.class); - } + @SuppressWarnings("unchecked") + public List getStruct(String fieldName, HCatSchema recordSchema) throws HCatException { + return (List) get(fieldName, recordSchema, List.class); + } - public void setStruct(String fieldName, HCatSchema recordSchema, List value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setStruct(String fieldName, HCatSchema recordSchema, List value) throws HCatException { + set(fieldName, recordSchema, value); + } - public List getList(String fieldName, HCatSchema recordSchema) throws HCatException { - return (List) get(fieldName, recordSchema, List.class); - } + public List getList(String fieldName, HCatSchema recordSchema) throws HCatException { + return (List) get(fieldName, recordSchema, List.class); + } - public void setList(String fieldName, HCatSchema recordSchema, List value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Map getMap(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Map) get(fieldName, recordSchema, Map.class); - } - - public void setMap(String fieldName, HCatSchema recordSchema, Map value) throws HCatException { - set(fieldName, recordSchema, value); - } + public void setList(String fieldName, HCatSchema recordSchema, List value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Map getMap(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Map) get(fieldName, recordSchema, Map.class); + } + + public void setMap(String fieldName, HCatSchema recordSchema, Map value) throws HCatException { + set(fieldName, recordSchema, value); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspector.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspector.java index 030f655..6f2633e 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspector.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspector.java @@ -26,28 +26,28 @@ public class HCatRecordObjectInspector extends StandardStructObjectInspector { - protected HCatRecordObjectInspector(List structFieldNames, - List structFieldObjectInspectors) { - super(structFieldNames, structFieldObjectInspectors); + protected HCatRecordObjectInspector(List structFieldNames, + List structFieldObjectInspectors) { + super(structFieldNames, structFieldObjectInspectors); + } + + @Override + public Object getStructFieldData(Object data, StructField fieldRef) { + if (data == null) { + return new IllegalArgumentException("Data passed in to get field from was null!"); } - @Override - public Object getStructFieldData(Object data, StructField fieldRef) { - if (data == null) { - return new IllegalArgumentException("Data passed in to get field from was null!"); - } - - int fieldID = ((MyField) fieldRef).getFieldID(); - if (!(fieldID >= 0 && fieldID < fields.size())) { - throw new IllegalArgumentException("Invalid field index [" + fieldID + "]"); - } - - return ((HCatRecord) data).get(fieldID); + int fieldID = ((MyField) fieldRef).getFieldID(); + if (!(fieldID >= 0 && fieldID < fields.size())) { + throw new IllegalArgumentException("Invalid field index [" + fieldID + "]"); } - @Override - public List getStructFieldsDataAsList(Object o) { - return ((HCatRecord) o).getAll(); - } + return ((HCatRecord) data).get(fieldID); + } + + @Override + public List getStructFieldsDataAsList(Object o) { + return ((HCatRecord) o).getAll(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspectorFactory.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspectorFactory.java index 9fbc9c0..81daa2d 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspectorFactory.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspectorFactory.java @@ -39,94 +39,94 @@ */ public class HCatRecordObjectInspectorFactory { - private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class); + private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class); - static HashMap cachedHCatRecordObjectInspectors = - new HashMap(); - static HashMap cachedObjectInspectors = - new HashMap(); + static HashMap cachedHCatRecordObjectInspectors = + new HashMap(); + static HashMap cachedObjectInspectors = + new HashMap(); - /** - * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into - * @param typeInfo Type definition for the record to look into - * @return appropriate HCatRecordObjectInspector - * @throws SerDeException - */ - public static HCatRecordObjectInspector getHCatRecordObjectInspector( - StructTypeInfo typeInfo) throws SerDeException { - HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo); - if (oi == null) { + /** + * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into + * @param typeInfo Type definition for the record to look into + * @return appropriate HCatRecordObjectInspector + * @throws SerDeException + */ + public static HCatRecordObjectInspector getHCatRecordObjectInspector( + StructTypeInfo typeInfo) throws SerDeException { + HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo); + if (oi == null) { - LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName()); - switch (typeInfo.getCategory()) { - case STRUCT: - StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List fieldNames = structTypeInfo.getAllStructFieldNames(); - List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size()); - for (int i = 0; i < fieldTypeInfos.size(); i++) { - fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); - } - oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors); - - break; - default: - // Hmm.. not good, - // the only type expected here is STRUCT, which maps to HCatRecord - // - anything else is an error. Return null as the inspector. - throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() - + "] was not of struct type - HCatRecord expected struct type, got [" - + typeInfo.getCategory().toString() + "]"); - } - cachedHCatRecordObjectInspectors.put(typeInfo, oi); + LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName()); + switch (typeInfo.getCategory()) { + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); } - return oi; + oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors); + + break; + default: + // Hmm.. not good, + // the only type expected here is STRUCT, which maps to HCatRecord + // - anything else is an error. Return null as the inspector. + throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() + + "] was not of struct type - HCatRecord expected struct type, got [" + + typeInfo.getCategory().toString() + "]"); + } + cachedHCatRecordObjectInspectors.put(typeInfo, oi); } + return oi; + } - public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) { + public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) { - ObjectInspector oi = cachedObjectInspectors.get(typeInfo); - if (oi == null) { + ObjectInspector oi = cachedObjectInspectors.get(typeInfo); + if (oi == null) { - LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName()); - switch (typeInfo.getCategory()) { - case PRIMITIVE: - oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); - break; - case STRUCT: - StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List fieldNames = structTypeInfo.getAllStructFieldNames(); - List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - List fieldObjectInspectors = - new ArrayList(fieldTypeInfos.size()); - for (int i = 0; i < fieldTypeInfos.size(); i++) { - fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); - } - oi = ObjectInspectorFactory.getStandardStructObjectInspector( - fieldNames, fieldObjectInspectors - ); - break; - case LIST: - ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((ListTypeInfo) typeInfo).getListElementTypeInfo()); - oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector); - break; - case MAP: - ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); - ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((MapTypeInfo) typeInfo).getMapValueTypeInfo()); - oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector); - break; - default: - oi = null; - } - cachedObjectInspectors.put(typeInfo, oi); + LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName()); + switch (typeInfo.getCategory()) { + case PRIMITIVE: + oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); + break; + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + List fieldObjectInspectors = + new ArrayList(fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); } - return oi; + oi = ObjectInspectorFactory.getStandardStructObjectInspector( + fieldNames, fieldObjectInspectors + ); + break; + case LIST: + ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((ListTypeInfo) typeInfo).getListElementTypeInfo()); + oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector); + break; + case MAP: + ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); + ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((MapTypeInfo) typeInfo).getMapValueTypeInfo()); + oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector); + break; + default: + oi = null; + } + cachedObjectInspectors.put(typeInfo, oi); } + return oi; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordSerDe.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordSerDe.java index 3153847..8a8620c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordSerDe.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordSerDe.java @@ -53,266 +53,266 @@ */ public class HCatRecordSerDe implements SerDe { - private static final Logger LOG = LoggerFactory.getLogger(HCatRecordSerDe.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatRecordSerDe.class); - public HCatRecordSerDe() throws SerDeException { - } + public HCatRecordSerDe() throws SerDeException { + } - private List columnNames; - private List columnTypes; - private StructTypeInfo rowTypeInfo; + private List columnNames; + private List columnTypes; + private StructTypeInfo rowTypeInfo; - private HCatRecordObjectInspector cachedObjectInspector; + private HCatRecordObjectInspector cachedObjectInspector; - @Override - public void initialize(Configuration conf, Properties tbl) - throws SerDeException { + @Override + public void initialize(Configuration conf, Properties tbl) + throws SerDeException { - LOG.debug("Initializing HCatRecordSerDe"); - LOG.debug("props to serde: {}", tbl.entrySet()); + LOG.debug("Initializing HCatRecordSerDe"); + LOG.debug("props to serde: {}", tbl.entrySet()); - // Get column names and types - String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); + // Get column names and types + String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); + String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - // all table column names - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } + // all table column names + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } - // all column types - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - } + // all column types + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } - LOG.debug("columns: {} {}", columnNameProperty, columnNames); - LOG.debug("types: {} {}", columnTypeProperty, columnTypes); - assert (columnNames.size() == columnTypes.size()); + LOG.debug("columns: {} {}", columnNameProperty, columnNames); + LOG.debug("types: {} {}", columnTypeProperty, columnTypes); + assert (columnNames.size() == columnTypes.size()); - rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - } + rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + } - public void initialize(HCatSchema hsch) throws SerDeException { + public void initialize(HCatSchema hsch) throws SerDeException { - LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}.", hsch); + LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}.", hsch); - rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString()); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString()); + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - } + } - /** - * The purpose of a deserialize method is to turn a data blob - * which is a writable representation of the data into an - * object that can then be parsed using the appropriate - * ObjectInspector. In this case, since HCatRecord is directly - * already the Writable object, there's no extra work to be done - * here. Most of the logic resides in the ObjectInspector to be - * able to return values from within the HCatRecord to hive when - * it wants it. - */ - @Override - public Object deserialize(Writable data) throws SerDeException { - if (!(data instanceof HCatRecord)) { - throw new SerDeException(getClass().getName() + ": expects HCatRecord!"); - } - - return (HCatRecord) data; + /** + * The purpose of a deserialize method is to turn a data blob + * which is a writable representation of the data into an + * object that can then be parsed using the appropriate + * ObjectInspector. In this case, since HCatRecord is directly + * already the Writable object, there's no extra work to be done + * here. Most of the logic resides in the ObjectInspector to be + * able to return values from within the HCatRecord to hive when + * it wants it. + */ + @Override + public Object deserialize(Writable data) throws SerDeException { + if (!(data instanceof HCatRecord)) { + throw new SerDeException(getClass().getName() + ": expects HCatRecord!"); } - /** - * The purpose of the serialize method is to turn an object-representation - * with a provided ObjectInspector into a Writable format, which - * the underlying layer can then use to write out. - * - * In this case, it means that Hive will call this method to convert - * an object with appropriate objectinspectors that it knows about, - * to write out a HCatRecord. - */ - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) - throws SerDeException { - if (objInspector.getCategory() != Category.STRUCT) { - throw new SerDeException(getClass().toString() - + " can only serialize struct types, but we got: " - + objInspector.getTypeName()); - } - return new DefaultHCatRecord((List) serializeStruct(obj, (StructObjectInspector) objInspector)); + return (HCatRecord) data; + } + + /** + * The purpose of the serialize method is to turn an object-representation + * with a provided ObjectInspector into a Writable format, which + * the underlying layer can then use to write out. + * + * In this case, it means that Hive will call this method to convert + * an object with appropriate objectinspectors that it knows about, + * to write out a HCatRecord. + */ + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + if (objInspector.getCategory() != Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only serialize struct types, but we got: " + + objInspector.getTypeName()); } + return new DefaultHCatRecord((List) serializeStruct(obj, (StructObjectInspector) objInspector)); + } - /** - * Return serialized HCatRecord from an underlying - * object-representation, and readable by an ObjectInspector - * @param obj : Underlying object-representation - * @param soi : StructObjectInspector - * @return HCatRecord - */ - private static List serializeStruct(Object obj, StructObjectInspector soi) - throws SerDeException { - - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(obj); - - if (list == null) { - return null; - } - - List l = new ArrayList(fields.size()); - - if (fields != null) { - for (int i = 0; i < fields.size(); i++) { - - // Get the field objectInspector and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = list.get(i); - Object res = serializeField(f, foi); - l.add(i, res); - } - } - return l; - } + /** + * Return serialized HCatRecord from an underlying + * object-representation, and readable by an ObjectInspector + * @param obj : Underlying object-representation + * @param soi : StructObjectInspector + * @return HCatRecord + */ + private static List serializeStruct(Object obj, StructObjectInspector soi) + throws SerDeException { - /** - * Return underlying Java Object from an object-representation - * that is readable by a provided ObjectInspector. - */ - public static Object serializeField(Object field, ObjectInspector fieldObjectInspector) - throws SerDeException { - - Object res; - if (fieldObjectInspector.getCategory() == Category.PRIMITIVE) { - res = serializePrimitiveField(field, fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.STRUCT) { - res = serializeStruct(field, (StructObjectInspector) fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.LIST) { - res = serializeList(field, (ListObjectInspector) fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.MAP) { - res = serializeMap(field, (MapObjectInspector) fieldObjectInspector); - } else { - throw new SerDeException(HCatRecordSerDe.class.toString() - + " does not know what to do with fields of unknown category: " - + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName()); - } - return res; - } + List fields = soi.getAllStructFieldRefs(); + List list = soi.getStructFieldsDataAsList(obj); - /** - * Helper method to return underlying Java Map from - * an object-representation that is readable by a provided - * MapObjectInspector - */ - private static Map serializeMap(Object f, MapObjectInspector moi) throws SerDeException { - ObjectInspector koi = moi.getMapKeyObjectInspector(); - ObjectInspector voi = moi.getMapValueObjectInspector(); - Map m = new TreeMap(); - - Map readMap = moi.getMap(f); - if (readMap == null) { - return null; - } else { - for (Map.Entry entry : readMap.entrySet()) { - m.put(serializeField(entry.getKey(), koi), serializeField(entry.getValue(), voi)); - } - } - return m; + if (list == null) { + return null; } - private static List serializeList(Object f, ListObjectInspector loi) throws SerDeException { - List l = loi.getList(f); - if (l == null) { - return null; - } - - ObjectInspector eloi = loi.getListElementObjectInspector(); - if (eloi.getCategory() == Category.PRIMITIVE) { - List list = new ArrayList(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(((PrimitiveObjectInspector) eloi).getPrimitiveJavaObject(l.get(i))); - } - return list; - } else if (eloi.getCategory() == Category.STRUCT) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi)); - } - return list; - } else if (eloi.getCategory() == Category.LIST) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeList(l.get(i), (ListObjectInspector) eloi)); - } - return list; - } else if (eloi.getCategory() == Category.MAP) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeMap(l.get(i), (MapObjectInspector) eloi)); - } - return list; - } else { - throw new SerDeException(HCatRecordSerDe.class.toString() - + " does not know what to do with fields of unknown category: " - + eloi.getCategory() + " , type: " + eloi.getTypeName()); - } - } + List l = new ArrayList(fields.size()); - private static Object serializePrimitiveField(Object field, - ObjectInspector fieldObjectInspector) { - - Object f = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field); - if (f != null && HCatContext.INSTANCE.getConf().isPresent()) { - Configuration conf = HCatContext.INSTANCE.getConf().get(); - - if (f instanceof Boolean && - conf.getBoolean( - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) { - return ((Boolean) f) ? 1 : 0; - } else if (f instanceof Short && - conf.getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { - return new Integer((Short) f); - } else if (f instanceof Byte && - conf.getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { - return new Integer((Byte) f); - } - } - - return f; - } + if (fields != null) { + for (int i = 0; i < fields.size(); i++) { - /** - * Return an object inspector that can read through the object - * that we return from deserialize(). To wit, that means we need - * to return an ObjectInspector that can read HCatRecord, given - * the type info for it during initialize(). This also means - * that this method cannot and should not be called before initialize() - */ - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return (ObjectInspector) cachedObjectInspector; + // Get the field objectInspector and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = list.get(i); + Object res = serializeField(f, foi); + l.add(i, res); + } + } + return l; + } + + /** + * Return underlying Java Object from an object-representation + * that is readable by a provided ObjectInspector. + */ + public static Object serializeField(Object field, ObjectInspector fieldObjectInspector) + throws SerDeException { + + Object res; + if (fieldObjectInspector.getCategory() == Category.PRIMITIVE) { + res = serializePrimitiveField(field, fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.STRUCT) { + res = serializeStruct(field, (StructObjectInspector) fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.LIST) { + res = serializeList(field, (ListObjectInspector) fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.MAP) { + res = serializeMap(field, (MapObjectInspector) fieldObjectInspector); + } else { + throw new SerDeException(HCatRecordSerDe.class.toString() + + " does not know what to do with fields of unknown category: " + + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName()); } + return res; + } + + /** + * Helper method to return underlying Java Map from + * an object-representation that is readable by a provided + * MapObjectInspector + */ + private static Map serializeMap(Object f, MapObjectInspector moi) throws SerDeException { + ObjectInspector koi = moi.getMapKeyObjectInspector(); + ObjectInspector voi = moi.getMapValueObjectInspector(); + Map m = new TreeMap(); + + Map readMap = moi.getMap(f); + if (readMap == null) { + return null; + } else { + for (Map.Entry entry : readMap.entrySet()) { + m.put(serializeField(entry.getKey(), koi), serializeField(entry.getValue(), voi)); + } + } + return m; + } - @Override - public Class getSerializedClass() { - return HCatRecord.class; + private static List serializeList(Object f, ListObjectInspector loi) throws SerDeException { + List l = loi.getList(f); + if (l == null) { + return null; } - @Override - public SerDeStats getSerDeStats() { - // no support for statistics yet - return null; + ObjectInspector eloi = loi.getListElementObjectInspector(); + if (eloi.getCategory() == Category.PRIMITIVE) { + List list = new ArrayList(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(((PrimitiveObjectInspector) eloi).getPrimitiveJavaObject(l.get(i))); + } + return list; + } else if (eloi.getCategory() == Category.STRUCT) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi)); + } + return list; + } else if (eloi.getCategory() == Category.LIST) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeList(l.get(i), (ListObjectInspector) eloi)); + } + return list; + } else if (eloi.getCategory() == Category.MAP) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeMap(l.get(i), (MapObjectInspector) eloi)); + } + return list; + } else { + throw new SerDeException(HCatRecordSerDe.class.toString() + + " does not know what to do with fields of unknown category: " + + eloi.getCategory() + " , type: " + eloi.getTypeName()); + } + } + + private static Object serializePrimitiveField(Object field, + ObjectInspector fieldObjectInspector) { + + Object f = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field); + if (f != null && HCatContext.INSTANCE.getConf().isPresent()) { + Configuration conf = HCatContext.INSTANCE.getConf().get(); + + if (f instanceof Boolean && + conf.getBoolean( + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) { + return ((Boolean) f) ? 1 : 0; + } else if (f instanceof Short && + conf.getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { + return new Integer((Short) f); + } else if (f instanceof Byte && + conf.getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { + return new Integer((Byte) f); + } } + return f; + } + + /** + * Return an object inspector that can read through the object + * that we return from deserialize(). To wit, that means we need + * to return an ObjectInspector that can read HCatRecord, given + * the type info for it during initialize(). This also means + * that this method cannot and should not be called before initialize() + */ + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return (ObjectInspector) cachedObjectInspector; + } + + @Override + public Class getSerializedClass() { + return HCatRecord.class; + } + + @Override + public SerDeStats getSerDeStats() { + // no support for statistics yet + return null; + } + } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordable.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordable.java index 4e9934e..9ab439c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordable.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordable.java @@ -27,30 +27,30 @@ */ public interface HCatRecordable extends Writable { - /** - * Gets the field at the specified index. - * @param fieldNum the field number - * @return the object at the specified index - */ - Object get(int fieldNum); - - /** - * Gets all the fields of the hcat record. - * @return the list of fields - */ - List getAll(); - - /** - * Sets the field at the specified index. - * @param fieldNum the field number - * @param value the value to set - */ - void set(int fieldNum, Object value); - - /** - * Gets the size of the hcat record. - * @return the size - */ - int size(); + /** + * Gets the field at the specified index. + * @param fieldNum the field number + * @return the object at the specified index + */ + Object get(int fieldNum); + + /** + * Gets all the fields of the hcat record. + * @return the list of fields + */ + List getAll(); + + /** + * Sets the field at the specified index. + * @param fieldNum the field number + * @param value the value to set + */ + void set(int fieldNum, Object value); + + /** + * Gets the size of the hcat record. + * @return the size + */ + int size(); } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java index b68f308..72b37f5 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java @@ -73,503 +73,503 @@ public class JsonSerDe implements SerDe { - private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class); - private List columnNames; - private List columnTypes; + private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class); + private List columnNames; + private List columnTypes; - private StructTypeInfo rowTypeInfo; - private HCatSchema schema; + private StructTypeInfo rowTypeInfo; + private HCatSchema schema; - private JsonFactory jsonFactory = null; + private JsonFactory jsonFactory = null; - private HCatRecordObjectInspector cachedObjectInspector; + private HCatRecordObjectInspector cachedObjectInspector; - @Override - public void initialize(Configuration conf, Properties tbl) - throws SerDeException { + @Override + public void initialize(Configuration conf, Properties tbl) + throws SerDeException { - LOG.debug("Initializing JsonSerDe"); - LOG.debug("props to serde: {}", tbl.entrySet()); + LOG.debug("Initializing JsonSerDe"); + LOG.debug("props to serde: {}", tbl.entrySet()); - // Get column names and types - String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); + // Get column names and types + String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); + String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - // all table column names - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } + // all table column names + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } - // all column types - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - } + // all column types + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } - LOG.debug("columns: {}, {}", columnNameProperty, columnNames); - LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes); + LOG.debug("columns: {}, {}", columnNameProperty, columnNames); + LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes); - assert (columnNames.size() == columnTypes.size()); + assert (columnNames.size() == columnTypes.size()); - rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - try { - schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema(); - LOG.debug("schema : {}", schema); - LOG.debug("fields : {}", schema.getFieldNames()); - } catch (HCatException e) { - throw new SerDeException(e); - } + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + try { + schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema(); + LOG.debug("schema : {}", schema); + LOG.debug("fields : {}", schema.getFieldNames()); + } catch (HCatException e) { + throw new SerDeException(e); + } - jsonFactory = new JsonFactory(); + jsonFactory = new JsonFactory(); + } + + /** + * Takes JSON string in Text form, and has to return an object representation above + * it that's readable by the corresponding object inspector. + * + * For this implementation, since we're using the jackson parser, we can construct + * our own object implementation, and we use HCatRecord for it + */ + @Override + public Object deserialize(Writable blob) throws SerDeException { + + Text t = (Text) blob; + JsonParser p; + List r = new ArrayList(Collections.nCopies(columnNames.size(), null)); + try { + p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes()))); + if (p.nextToken() != JsonToken.START_OBJECT) { + throw new IOException("Start token not found where expected"); + } + JsonToken token; + while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) { + // iterate through each token, and create appropriate object here. + populateRecord(r, token, p, schema); + } + } catch (JsonParseException e) { + LOG.warn("Error [{}] parsing json text [{}].", e, t); + LOG.debug(null, e); + throw new SerDeException(e); + } catch (IOException e) { + LOG.warn("Error [{}] parsing json text [{}].", e, t); + LOG.debug(null, e); + throw new SerDeException(e); } - /** - * Takes JSON string in Text form, and has to return an object representation above - * it that's readable by the corresponding object inspector. - * - * For this implementation, since we're using the jackson parser, we can construct - * our own object implementation, and we use HCatRecord for it - */ - @Override - public Object deserialize(Writable blob) throws SerDeException { - - Text t = (Text) blob; - JsonParser p; - List r = new ArrayList(Collections.nCopies(columnNames.size(), null)); - try { - p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes()))); - if (p.nextToken() != JsonToken.START_OBJECT) { - throw new IOException("Start token not found where expected"); - } - JsonToken token; - while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) { - // iterate through each token, and create appropriate object here. - populateRecord(r, token, p, schema); - } - } catch (JsonParseException e) { - LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); - throw new SerDeException(e); - } catch (IOException e) { - LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); - throw new SerDeException(e); - } + return new DefaultHCatRecord(r); + } - return new DefaultHCatRecord(r); + private void populateRecord(List r, JsonToken token, JsonParser p, HCatSchema s) throws IOException { + if (token != JsonToken.FIELD_NAME) { + throw new IOException("Field name expected"); } - - private void populateRecord(List r, JsonToken token, JsonParser p, HCatSchema s) throws IOException { - if (token != JsonToken.FIELD_NAME) { - throw new IOException("Field name expected"); - } - String fieldName = p.getText(); - int fpos; - try { - fpos = s.getPosition(fieldName); - } catch (NullPointerException npe) { - fpos = getPositionFromHiveInternalColumnName(fieldName); - LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName, s); - if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) { - LOG.error("Hive internal column name {} and position " - + "encoding {} for the column name are at odds", fieldName, fpos); - throw npe; - } - if (fpos == -1) { - return; // unknown field, we return. - } - } - HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos); - Object currField = extractCurrentField(p, null, hcatFieldSchema, false); - r.set(fpos, currField); + String fieldName = p.getText(); + int fpos; + try { + fpos = s.getPosition(fieldName); + } catch (NullPointerException npe) { + fpos = getPositionFromHiveInternalColumnName(fieldName); + LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName, s); + if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) { + LOG.error("Hive internal column name {} and position " + + "encoding {} for the column name are at odds", fieldName, fpos); + throw npe; + } + if (fpos == -1) { + return; // unknown field, we return. + } } + HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos); + Object currField = extractCurrentField(p, null, hcatFieldSchema, false); + r.set(fpos, currField); + } - public String getHiveInternalColumnName(int fpos) { - return HiveConf.getColumnInternalName(fpos); - } + public String getHiveInternalColumnName(int fpos) { + return HiveConf.getColumnInternalName(fpos); + } - public int getPositionFromHiveInternalColumnName(String internalName) { + public int getPositionFromHiveInternalColumnName(String internalName) { // return HiveConf.getPositionFromInternalName(fieldName); - // The above line should have been all the implementation that - // we need, but due to a bug in that impl which recognizes - // only single-digit columns, we need another impl here. - Pattern internalPattern = Pattern.compile("_col([0-9]+)"); - Matcher m = internalPattern.matcher(internalName); - if (!m.matches()) { - return -1; - } else { - return Integer.parseInt(m.group(1)); - } + // The above line should have been all the implementation that + // we need, but due to a bug in that impl which recognizes + // only single-digit columns, we need another impl here. + Pattern internalPattern = Pattern.compile("_col([0-9]+)"); + Matcher m = internalPattern.matcher(internalName); + if (!m.matches()) { + return -1; + } else { + return Integer.parseInt(m.group(1)); + } + } + + /** + * Utility method to extract current expected field from given JsonParser + * + * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types) + * It is possible that one of them can be null, and so, if so, the other is instantiated + * from the other + * + * isTokenCurrent is a boolean variable also passed in, which determines + * if the JsonParser is already at the token we expect to read next, or + * needs advancing to the next before we read. + */ + private Object extractCurrentField(JsonParser p, Type t, + HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException, + HCatException { + Object val = null; + JsonToken valueToken; + if (isTokenCurrent) { + valueToken = p.getCurrentToken(); + } else { + valueToken = p.nextToken(); } - /** - * Utility method to extract current expected field from given JsonParser - * - * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types) - * It is possible that one of them can be null, and so, if so, the other is instantiated - * from the other - * - * isTokenCurrent is a boolean variable also passed in, which determines - * if the JsonParser is already at the token we expect to read next, or - * needs advancing to the next before we read. - */ - private Object extractCurrentField(JsonParser p, Type t, - HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException, - HCatException { - Object val = null; - JsonToken valueToken; - if (isTokenCurrent) { - valueToken = p.getCurrentToken(); + if (hcatFieldSchema != null) { + t = hcatFieldSchema.getType(); + } + switch (t) { + case INT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue(); + break; + case TINYINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue(); + break; + case SMALLINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue(); + break; + case BIGINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue(); + break; + case BOOLEAN: + String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); + if (bval != null) { + val = Boolean.valueOf(bval); + } else { + val = null; + } + break; + case FLOAT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue(); + break; + case DOUBLE: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue(); + break; + case STRING: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); + break; + case BINARY: + throw new IOException("JsonSerDe does not support BINARY type"); + case ARRAY: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_ARRAY) { + throw new IOException("Start of Array expected"); + } + List arr = new ArrayList(); + while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) { + arr.add(extractCurrentField(p, null, hcatFieldSchema.getArrayElementSchema().get(0), true)); + } + val = arr; + break; + case MAP: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_OBJECT) { + throw new IOException("Start of Object expected"); + } + Map map = new LinkedHashMap(); + Type keyType = hcatFieldSchema.getMapKeyType(); + HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0); + while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { + Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), keyType); + Object v; + if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) { + v = extractCurrentField(p, null, valueSchema, false); } else { - valueToken = p.nextToken(); + v = extractCurrentField(p, null, valueSchema, true); } - if (hcatFieldSchema != null) { - t = hcatFieldSchema.getType(); - } - switch (t) { - case INT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue(); - break; - case TINYINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue(); - break; - case SMALLINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue(); - break; - case BIGINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue(); - break; - case BOOLEAN: - String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); - if (bval != null) { - val = Boolean.valueOf(bval); - } else { - val = null; - } - break; - case FLOAT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue(); - break; - case DOUBLE: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue(); - break; - case STRING: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); - break; - case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); - case ARRAY: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_ARRAY) { - throw new IOException("Start of Array expected"); - } - List arr = new ArrayList(); - while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) { - arr.add(extractCurrentField(p, null, hcatFieldSchema.getArrayElementSchema().get(0), true)); - } - val = arr; - break; - case MAP: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_OBJECT) { - throw new IOException("Start of Object expected"); - } - Map map = new LinkedHashMap(); - Type keyType = hcatFieldSchema.getMapKeyType(); - HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0); - while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { - Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), keyType); - Object v; - if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) { - v = extractCurrentField(p, null, valueSchema, false); - } else { - v = extractCurrentField(p, null, valueSchema, true); - } - - map.put(k, v); - } - val = map; - break; - case STRUCT: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_OBJECT) { - throw new IOException("Start of Object expected"); - } - HCatSchema subSchema = hcatFieldSchema.getStructSubSchema(); - int sz = subSchema.getFieldNames().size(); - - List struct = new ArrayList(Collections.nCopies(sz, null)); - while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { - populateRecord(struct, valueToken, p, subSchema); - } - val = struct; - break; - } - return val; + map.put(k, v); + } + val = map; + break; + case STRUCT: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_OBJECT) { + throw new IOException("Start of Object expected"); + } + HCatSchema subSchema = hcatFieldSchema.getStructSubSchema(); + int sz = subSchema.getFieldNames().size(); + + List struct = new ArrayList(Collections.nCopies(sz, null)); + while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { + populateRecord(struct, valueToken, p, subSchema); + } + val = struct; + break; } - - private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException { - switch (t) { - case INT: - return Integer.valueOf(s); - case TINYINT: - return Byte.valueOf(s); - case SMALLINT: - return Short.valueOf(s); - case BIGINT: - return Long.valueOf(s); - case BOOLEAN: - return (s.equalsIgnoreCase("true")); - case FLOAT: - return Float.valueOf(s); - case DOUBLE: - return Double.valueOf(s); - case STRING: - return s; - case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); - } - throw new IOException("Could not convert from string to map type " + t); + return val; + } + + private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException { + switch (t) { + case INT: + return Integer.valueOf(s); + case TINYINT: + return Byte.valueOf(s); + case SMALLINT: + return Short.valueOf(s); + case BIGINT: + return Long.valueOf(s); + case BOOLEAN: + return (s.equalsIgnoreCase("true")); + case FLOAT: + return Float.valueOf(s); + case DOUBLE: + return Double.valueOf(s); + case STRING: + return s; + case BINARY: + throw new IOException("JsonSerDe does not support BINARY type"); } - - /** - * Given an object and object inspector pair, traverse the object - * and generate a Text representation of the object. - */ - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) - throws SerDeException { - StringBuilder sb = new StringBuilder(); - try { - - StructObjectInspector soi = (StructObjectInspector) objInspector; - List structFields = soi.getAllStructFieldRefs(); - assert (columnNames.size() == structFields.size()); - if (obj == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - for (int i = 0; i < structFields.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - sb.append(SerDeUtils.QUOTE); - sb.append(columnNames.get(i)); - sb.append(SerDeUtils.QUOTE); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)), - structFields.get(i).getFieldObjectInspector()); - } - sb.append(SerDeUtils.RBRACE); - } - - } catch (IOException e) { - LOG.warn("Error generating json text from object.", e); - throw new SerDeException(e); + throw new IOException("Could not convert from string to map type " + t); + } + + /** + * Given an object and object inspector pair, traverse the object + * and generate a Text representation of the object. + */ + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + StringBuilder sb = new StringBuilder(); + try { + + StructObjectInspector soi = (StructObjectInspector) objInspector; + List structFields = soi.getAllStructFieldRefs(); + assert (columnNames.size() == structFields.size()); + if (obj == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + for (int i = 0; i < structFields.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + sb.append(SerDeUtils.QUOTE); + sb.append(columnNames.get(i)); + sb.append(SerDeUtils.QUOTE); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)), + structFields.get(i).getFieldObjectInspector()); } - return new Text(sb.toString()); - } + sb.append(SerDeUtils.RBRACE); + } - // TODO : code section copied over from SerDeUtils because of non-standard json production there - // should use quotes for all field names. We should fix this there, and then remove this copy. - // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES - // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure - // when attempting to use that feature, so having to change the production itself. - // Also, throws IOException when Binary is detected. - private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException { - - switch (oi.getCategory()) { - case PRIMITIVE: { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - if (o == null) { - sb.append("null"); - } else { - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: { - boolean b = ((BooleanObjectInspector) poi).get(o); - sb.append(b ? "true" : "false"); - break; - } - case BYTE: { - sb.append(((ByteObjectInspector) poi).get(o)); - break; - } - case SHORT: { - sb.append(((ShortObjectInspector) poi).get(o)); - break; - } - case INT: { - sb.append(((IntObjectInspector) poi).get(o)); - break; - } - case LONG: { - sb.append(((LongObjectInspector) poi).get(o)); - break; - } - case FLOAT: { - sb.append(((FloatObjectInspector) poi).get(o)); - break; - } - case DOUBLE: { - sb.append(((DoubleObjectInspector) poi).get(o)); - break; - } - case STRING: { - sb.append('"'); - sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi) - .getPrimitiveJavaObject(o))); - sb.append('"'); - break; - } - case TIMESTAMP: { - sb.append('"'); - sb.append(((TimestampObjectInspector) poi) - .getPrimitiveWritableObject(o)); - sb.append('"'); - break; - } - case BINARY: { - throw new IOException("JsonSerDe does not support BINARY type"); - } - default: - throw new RuntimeException("Unknown primitive type: " - + poi.getPrimitiveCategory()); - } - } - break; + } catch (IOException e) { + LOG.warn("Error generating json text from object.", e); + throw new SerDeException(e); + } + return new Text(sb.toString()); + } + + // TODO : code section copied over from SerDeUtils because of non-standard json production there + // should use quotes for all field names. We should fix this there, and then remove this copy. + // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES + // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure + // when attempting to use that feature, so having to change the production itself. + // Also, throws IOException when Binary is detected. + private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException { + + switch (oi.getCategory()) { + case PRIMITIVE: { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + if (o == null) { + sb.append("null"); + } else { + switch (poi.getPrimitiveCategory()) { + case BOOLEAN: { + boolean b = ((BooleanObjectInspector) poi).get(o); + sb.append(b ? "true" : "false"); + break; + } + case BYTE: { + sb.append(((ByteObjectInspector) poi).get(o)); + break; + } + case SHORT: { + sb.append(((ShortObjectInspector) poi).get(o)); + break; + } + case INT: { + sb.append(((IntObjectInspector) poi).get(o)); + break; + } + case LONG: { + sb.append(((LongObjectInspector) poi).get(o)); + break; + } + case FLOAT: { + sb.append(((FloatObjectInspector) poi).get(o)); + break; } - case LIST: { - ListObjectInspector loi = (ListObjectInspector) oi; - ObjectInspector listElementObjectInspector = loi - .getListElementObjectInspector(); - List olist = loi.getList(o); - if (olist == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACKET); - for (int i = 0; i < olist.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - buildJSONString(sb, olist.get(i), listElementObjectInspector); - } - sb.append(SerDeUtils.RBRACKET); - } - break; + case DOUBLE: { + sb.append(((DoubleObjectInspector) poi).get(o)); + break; } - case MAP: { - MapObjectInspector moi = (MapObjectInspector) oi; - ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); - ObjectInspector mapValueObjectInspector = moi - .getMapValueObjectInspector(); - Map omap = moi.getMap(o); - if (omap == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - boolean first = true; - for (Object entry : omap.entrySet()) { - if (first) { - first = false; - } else { - sb.append(SerDeUtils.COMMA); - } - Map.Entry e = (Map.Entry) entry; - StringBuilder keyBuilder = new StringBuilder(); - buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector); - String keyString = keyBuilder.toString().trim(); - boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE); - if (doQuoting) { - sb.append(SerDeUtils.QUOTE); - } - sb.append(keyString); - if (doQuoting) { - sb.append(SerDeUtils.QUOTE); - } - sb.append(SerDeUtils.COLON); - buildJSONString(sb, e.getValue(), mapValueObjectInspector); - } - sb.append(SerDeUtils.RBRACE); - } - break; + case STRING: { + sb.append('"'); + sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi) + .getPrimitiveJavaObject(o))); + sb.append('"'); + break; } - case STRUCT: { - StructObjectInspector soi = (StructObjectInspector) oi; - List structFields = soi.getAllStructFieldRefs(); - if (o == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - for (int i = 0; i < structFields.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - sb.append(SerDeUtils.QUOTE); - sb.append(structFields.get(i).getFieldName()); - sb.append(SerDeUtils.QUOTE); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), - structFields.get(i).getFieldObjectInspector()); - } - sb.append(SerDeUtils.RBRACE); - } - break; + case TIMESTAMP: { + sb.append('"'); + sb.append(((TimestampObjectInspector) poi) + .getPrimitiveWritableObject(o)); + sb.append('"'); + break; } - case UNION: { - UnionObjectInspector uoi = (UnionObjectInspector) oi; - if (o == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - sb.append(uoi.getTag(o)); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, uoi.getField(o), - uoi.getObjectInspectors().get(uoi.getTag(o))); - sb.append(SerDeUtils.RBRACE); - } - break; + case BINARY: { + throw new IOException("JsonSerDe does not support BINARY type"); } default: - throw new RuntimeException("Unknown type in ObjectInspector!"); + throw new RuntimeException("Unknown primitive type: " + + poi.getPrimitiveCategory()); } + } + break; } - - - /** - * Returns an object inspector for the specified schema that - * is capable of reading in the object representation of the JSON string - */ - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return cachedObjectInspector; + case LIST: { + ListObjectInspector loi = (ListObjectInspector) oi; + ObjectInspector listElementObjectInspector = loi + .getListElementObjectInspector(); + List olist = loi.getList(o); + if (olist == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACKET); + for (int i = 0; i < olist.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + buildJSONString(sb, olist.get(i), listElementObjectInspector); + } + sb.append(SerDeUtils.RBRACKET); + } + break; } - - @Override - public Class getSerializedClass() { - return Text.class; + case MAP: { + MapObjectInspector moi = (MapObjectInspector) oi; + ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); + ObjectInspector mapValueObjectInspector = moi + .getMapValueObjectInspector(); + Map omap = moi.getMap(o); + if (omap == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + boolean first = true; + for (Object entry : omap.entrySet()) { + if (first) { + first = false; + } else { + sb.append(SerDeUtils.COMMA); + } + Map.Entry e = (Map.Entry) entry; + StringBuilder keyBuilder = new StringBuilder(); + buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector); + String keyString = keyBuilder.toString().trim(); + boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE); + if (doQuoting) { + sb.append(SerDeUtils.QUOTE); + } + sb.append(keyString); + if (doQuoting) { + sb.append(SerDeUtils.QUOTE); + } + sb.append(SerDeUtils.COLON); + buildJSONString(sb, e.getValue(), mapValueObjectInspector); + } + sb.append(SerDeUtils.RBRACE); + } + break; } - - @Override - public SerDeStats getSerDeStats() { - // no support for statistics yet - return null; + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector) oi; + List structFields = soi.getAllStructFieldRefs(); + if (o == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + for (int i = 0; i < structFields.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + sb.append(SerDeUtils.QUOTE); + sb.append(structFields.get(i).getFieldName()); + sb.append(SerDeUtils.QUOTE); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), + structFields.get(i).getFieldObjectInspector()); + } + sb.append(SerDeUtils.RBRACE); + } + break; + } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + if (o == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + sb.append(uoi.getTag(o)); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, uoi.getField(o), + uoi.getObjectInspectors().get(uoi.getTag(o))); + sb.append(SerDeUtils.RBRACE); + } + break; + } + default: + throw new RuntimeException("Unknown type in ObjectInspector!"); } + } + + + /** + * Returns an object inspector for the specified schema that + * is capable of reading in the object representation of the JSON string + */ + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return cachedObjectInspector; + } + + @Override + public Class getSerializedClass() { + return Text.class; + } + + @Override + public SerDeStats getSerDeStats() { + // no support for statistics yet + return null; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/LazyHCatRecord.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/LazyHCatRecord.java index edac621..43c36aa 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/LazyHCatRecord.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/LazyHCatRecord.java @@ -41,106 +41,106 @@ */ public class LazyHCatRecord extends HCatRecord { - public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName()); - - private Object wrappedObject; - private StructObjectInspector soi; - - @Override - public Object get(int fieldNum) { - try { - StructField fref = soi.getAllStructFieldRefs().get(fieldNum); - return HCatRecordSerDe.serializeField( - soi.getStructFieldData(wrappedObject, fref), - fref.getFieldObjectInspector()); - } catch (SerDeException e) { - throw new IllegalStateException("SerDe Exception deserializing",e); - } + public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName()); + + private Object wrappedObject; + private StructObjectInspector soi; + + @Override + public Object get(int fieldNum) { + try { + StructField fref = soi.getAllStructFieldRefs().get(fieldNum); + return HCatRecordSerDe.serializeField( + soi.getStructFieldData(wrappedObject, fref), + fref.getFieldObjectInspector()); + } catch (SerDeException e) { + throw new IllegalStateException("SerDe Exception deserializing",e); } + } - @Override - public List getAll() { - List r = new ArrayList(this.size()); - for (int i = 0; i < this.size(); i++){ - r.add(i, get(i)); - } - return r; + @Override + public List getAll() { + List r = new ArrayList(this.size()); + for (int i = 0; i < this.size(); i++){ + r.add(i, get(i)); } - - @Override - public void set(int fieldNum, Object value) { - throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); - } - - @Override - public int size() { - return soi.getAllStructFieldRefs().size(); - } - - @Override - public void readFields(DataInput in) throws IOException { - throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" - + " an object/object inspector as a HCatRecord " - + "- it does not need to be read from DataInput."); + return r; + } + + @Override + public void set(int fieldNum, Object value) { + throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); + } + + @Override + public int size() { + return soi.getAllStructFieldRefs().size(); + } + + @Override + public void readFields(DataInput in) throws IOException { + throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" + + " an object/object inspector as a HCatRecord " + + "- it does not need to be read from DataInput."); + } + + @Override + public void write(DataOutput out) throws IOException { + throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" + + " an object/object inspector as a HCatRecord " + + "- it does not need to be written to a DataOutput."); + } + + @Override + public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { + int idx = recordSchema.getPosition(fieldName); + return get(idx); + } + + @Override + public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { + throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); + } + + @Override + public void remove(int idx) throws HCatException { + throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord"); + } + + @Override + public void copy(HCatRecord r) throws HCatException { + throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord"); + } + + public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception { + if (oi.getCategory() != Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only make a lazy hcat record from " + + "objects of struct types, but we got: " + oi.getTypeName()); } - @Override - public void write(DataOutput out) throws IOException { - throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" - + " an object/object inspector as a HCatRecord " - + "- it does not need to be written to a DataOutput."); - } - - @Override - public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { - int idx = recordSchema.getPosition(fieldName); - return get(idx); - } - - @Override - public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { - throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); - } - - @Override - public void remove(int idx) throws HCatException { - throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord"); - } - - @Override - public void copy(HCatRecord r) throws HCatException { - throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord"); - } - - public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception { - if (oi.getCategory() != Category.STRUCT) { - throw new SerDeException(getClass().toString() + - " can only make a lazy hcat record from " + - "objects of struct types, but we got: " + oi.getTypeName()); - } - - this.soi = (StructObjectInspector)oi; - this.wrappedObject = wrappedObject; - } - - @Override - public String toString(){ - StringBuilder sb = new StringBuilder(); - for(int i = 0; i< size() ; i++) { - sb.append(get(i)+"\t"); - } - return sb.toString(); - } + this.soi = (StructObjectInspector)oi; + this.wrappedObject = wrappedObject; + } - /** - * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required - * before you can write out a record via write. - * @return an HCatRecord that can be serialized - * @throws HCatException - */ - public HCatRecord getWritable() throws HCatException { - DefaultHCatRecord d = new DefaultHCatRecord(); - d.copy(this); - return d; + @Override + public String toString(){ + StringBuilder sb = new StringBuilder(); + for(int i = 0; i< size() ; i++) { + sb.append(get(i)+"\t"); } + return sb.toString(); + } + + /** + * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required + * before you can write out a record via write. + * @return an HCatRecord that can be serialized + * @throws HCatException + */ + public HCatRecord getWritable() throws HCatException { + DefaultHCatRecord d = new DefaultHCatRecord(); + d.copy(this); + return d; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/Pair.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/Pair.java index c71157b..54002eb 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/Pair.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/Pair.java @@ -25,65 +25,65 @@ */ public class Pair implements Serializable { - private static final long serialVersionUID = 1L; - public T first; - public U second; + private static final long serialVersionUID = 1L; + public T first; + public U second; - /** - * @param f First element in pair. - * @param s Second element in pair. - */ - public Pair(T f, U s) { - first = f; - second = s; - } + /** + * @param f First element in pair. + * @param s Second element in pair. + */ + public Pair(T f, U s) { + first = f; + second = s; + } - /* (non-Javadoc) - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "[" + first.toString() + "," + second.toString() + "]"; - } + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "[" + first.toString() + "," + second.toString() + "]"; + } - @Override - public int hashCode() { - return (((this.first == null ? 1 : this.first.hashCode()) * 17) - + (this.second == null ? 1 : this.second.hashCode()) * 19); - } + @Override + public int hashCode() { + return (((this.first == null ? 1 : this.first.hashCode()) * 17) + + (this.second == null ? 1 : this.second.hashCode()) * 19); + } - @Override - public boolean equals(Object other) { - if (other == null) { - return false; - } + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } - if (!(other instanceof Pair)) { - return false; - } + if (!(other instanceof Pair)) { + return false; + } - Pair otherPair = (Pair) other; + Pair otherPair = (Pair) other; - if (this.first == null) { - if (otherPair.first != null) { - return false; - } else { - return true; - } - } + if (this.first == null) { + if (otherPair.first != null) { + return false; + } else { + return true; + } + } - if (this.second == null) { - if (otherPair.second != null) { - return false; - } else { - return true; - } - } + if (this.second == null) { + if (otherPair.second != null) { + return false; + } else { + return true; + } + } - if (this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) { - return true; - } else { - return false; - } + if (this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) { + return true; + } else { + return false; } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/ReaderWriter.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/ReaderWriter.java index 53e440b..7719569 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/ReaderWriter.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/ReaderWriter.java @@ -35,158 +35,158 @@ public abstract class ReaderWriter { - private static final String UTF8 = "UTF-8"; - - public static Object readDatum(DataInput in) throws IOException { - - byte type = in.readByte(); - switch (type) { - - case DataType.STRING: - byte[] buffer = new byte[in.readInt()]; - in.readFully(buffer); - return new String(buffer, UTF8); - - case DataType.INTEGER: - VIntWritable vint = new VIntWritable(); - vint.readFields(in); - return vint.get(); - - case DataType.LONG: - VLongWritable vlong = new VLongWritable(); - vlong.readFields(in); - return vlong.get(); - - case DataType.FLOAT: - return in.readFloat(); - - case DataType.DOUBLE: - return in.readDouble(); - - case DataType.BOOLEAN: - return in.readBoolean(); - - case DataType.BYTE: - return in.readByte(); - - case DataType.SHORT: - return in.readShort(); - - case DataType.NULL: - return null; - - case DataType.BINARY: - int len = in.readInt(); - byte[] ba = new byte[len]; - in.readFully(ba); - return ba; - - case DataType.MAP: - int size = in.readInt(); - Map m = new HashMap(size); - for (int i = 0; i < size; i++) { - m.put(readDatum(in), readDatum(in)); - } - return m; - - case DataType.LIST: - int sz = in.readInt(); - List list = new ArrayList(sz); - for (int i = 0; i < sz; i++) { - list.add(readDatum(in)); - } - return list; - - default: - throw new IOException("Unexpected data type " + type + - " found in stream."); - } + private static final String UTF8 = "UTF-8"; + + public static Object readDatum(DataInput in) throws IOException { + + byte type = in.readByte(); + switch (type) { + + case DataType.STRING: + byte[] buffer = new byte[in.readInt()]; + in.readFully(buffer); + return new String(buffer, UTF8); + + case DataType.INTEGER: + VIntWritable vint = new VIntWritable(); + vint.readFields(in); + return vint.get(); + + case DataType.LONG: + VLongWritable vlong = new VLongWritable(); + vlong.readFields(in); + return vlong.get(); + + case DataType.FLOAT: + return in.readFloat(); + + case DataType.DOUBLE: + return in.readDouble(); + + case DataType.BOOLEAN: + return in.readBoolean(); + + case DataType.BYTE: + return in.readByte(); + + case DataType.SHORT: + return in.readShort(); + + case DataType.NULL: + return null; + + case DataType.BINARY: + int len = in.readInt(); + byte[] ba = new byte[len]; + in.readFully(ba); + return ba; + + case DataType.MAP: + int size = in.readInt(); + Map m = new HashMap(size); + for (int i = 0; i < size; i++) { + m.put(readDatum(in), readDatum(in)); + } + return m; + + case DataType.LIST: + int sz = in.readInt(); + List list = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + list.add(readDatum(in)); + } + return list; + + default: + throw new IOException("Unexpected data type " + type + + " found in stream."); } - - public static void writeDatum(DataOutput out, Object val) throws IOException { - // write the data type - byte type = DataType.findType(val); - switch (type) { - case DataType.LIST: - out.writeByte(DataType.LIST); - List list = (List) val; - int sz = list.size(); - out.writeInt(sz); - for (int i = 0; i < sz; i++) { - writeDatum(out, list.get(i)); - } - return; - - case DataType.MAP: - out.writeByte(DataType.MAP); - Map m = (Map) val; - out.writeInt(m.size()); - Iterator i = - m.entrySet().iterator(); - while (i.hasNext()) { - Entry entry = (Entry) i.next(); - writeDatum(out, entry.getKey()); - writeDatum(out, entry.getValue()); - } - return; - - case DataType.INTEGER: - out.writeByte(DataType.INTEGER); - new VIntWritable((Integer) val).write(out); - return; - - case DataType.LONG: - out.writeByte(DataType.LONG); - new VLongWritable((Long) val).write(out); - return; - - case DataType.FLOAT: - out.writeByte(DataType.FLOAT); - out.writeFloat((Float) val); - return; - - case DataType.DOUBLE: - out.writeByte(DataType.DOUBLE); - out.writeDouble((Double) val); - return; - - case DataType.BOOLEAN: - out.writeByte(DataType.BOOLEAN); - out.writeBoolean((Boolean) val); - return; - - case DataType.BYTE: - out.writeByte(DataType.BYTE); - out.writeByte((Byte) val); - return; - - case DataType.SHORT: - out.writeByte(DataType.SHORT); - out.writeShort((Short) val); - return; - - case DataType.STRING: - String s = (String) val; - byte[] utfBytes = s.getBytes(ReaderWriter.UTF8); - out.writeByte(DataType.STRING); - out.writeInt(utfBytes.length); - out.write(utfBytes); - return; - - case DataType.BINARY: - byte[] ba = (byte[]) val; - out.writeByte(DataType.BINARY); - out.writeInt(ba.length); - out.write(ba); - return; - - case DataType.NULL: - out.writeByte(DataType.NULL); - return; - - default: - throw new IOException("Unexpected data type " + type + - " found in stream."); - } + } + + public static void writeDatum(DataOutput out, Object val) throws IOException { + // write the data type + byte type = DataType.findType(val); + switch (type) { + case DataType.LIST: + out.writeByte(DataType.LIST); + List list = (List) val; + int sz = list.size(); + out.writeInt(sz); + for (int i = 0; i < sz; i++) { + writeDatum(out, list.get(i)); + } + return; + + case DataType.MAP: + out.writeByte(DataType.MAP); + Map m = (Map) val; + out.writeInt(m.size()); + Iterator i = + m.entrySet().iterator(); + while (i.hasNext()) { + Entry entry = (Entry) i.next(); + writeDatum(out, entry.getKey()); + writeDatum(out, entry.getValue()); + } + return; + + case DataType.INTEGER: + out.writeByte(DataType.INTEGER); + new VIntWritable((Integer) val).write(out); + return; + + case DataType.LONG: + out.writeByte(DataType.LONG); + new VLongWritable((Long) val).write(out); + return; + + case DataType.FLOAT: + out.writeByte(DataType.FLOAT); + out.writeFloat((Float) val); + return; + + case DataType.DOUBLE: + out.writeByte(DataType.DOUBLE); + out.writeDouble((Double) val); + return; + + case DataType.BOOLEAN: + out.writeByte(DataType.BOOLEAN); + out.writeBoolean((Boolean) val); + return; + + case DataType.BYTE: + out.writeByte(DataType.BYTE); + out.writeByte((Byte) val); + return; + + case DataType.SHORT: + out.writeByte(DataType.SHORT); + out.writeShort((Short) val); + return; + + case DataType.STRING: + String s = (String) val; + byte[] utfBytes = s.getBytes(ReaderWriter.UTF8); + out.writeByte(DataType.STRING); + out.writeInt(utfBytes.length); + out.write(utfBytes); + return; + + case DataType.BINARY: + byte[] ba = (byte[]) val; + out.writeByte(DataType.BINARY); + out.writeInt(ba.length); + out.write(ba); + return; + + case DataType.NULL: + out.writeByte(DataType.NULL); + return; + + default: + throw new IOException("Unexpected data type " + type + + " found in stream."); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatFieldSchema.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatFieldSchema.java index 2b06469..92596e0 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatFieldSchema.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatFieldSchema.java @@ -25,268 +25,268 @@ public class HCatFieldSchema implements Serializable { - public enum Type { - INT, - TINYINT, - SMALLINT, - BIGINT, - BOOLEAN, - FLOAT, - DOUBLE, - STRING, - ARRAY, - MAP, - STRUCT, - BINARY, + public enum Type { + INT, + TINYINT, + SMALLINT, + BIGINT, + BOOLEAN, + FLOAT, + DOUBLE, + STRING, + ARRAY, + MAP, + STRUCT, + BINARY, + } + + public enum Category { + PRIMITIVE, + ARRAY, + MAP, + STRUCT; + + public static Category fromType(Type type) { + if (Type.ARRAY == type) { + return ARRAY; + } else if (Type.STRUCT == type) { + return STRUCT; + } else if (Type.MAP == type) { + return MAP; + } else { + return PRIMITIVE; + } } - - public enum Category { - PRIMITIVE, - ARRAY, - MAP, - STRUCT; - - public static Category fromType(Type type) { - if (Type.ARRAY == type) { - return ARRAY; - } else if (Type.STRUCT == type) { - return STRUCT; - } else if (Type.MAP == type) { - return MAP; - } else { - return PRIMITIVE; - } - } - } - - ; - - public boolean isComplex() { - return (category == Category.PRIMITIVE) ? false : true; - } - - /** - * - */ - private static final long serialVersionUID = 1L; - - String fieldName = null; - String comment = null; - Type type = null; - Category category = null; - - // Populated if column is struct, array or map types. - // If struct type, contains schema of the struct. - // If array type, contains schema of one of the elements. - // If map type, contains schema of the value element. - HCatSchema subSchema = null; - - // populated if column is Map type - Type mapKeyType = null; - - private String typeString = null; - - @SuppressWarnings("unused") - private HCatFieldSchema() { - // preventing empty ctor from being callable - } - - /** - * Returns type of the field - * @return type of the field - */ - public Type getType() { - return type; - } - - /** - * Returns category of the field - * @return category of the field - */ - public Category getCategory() { - return category; - } - - /** - * Returns name of the field - * @return name of the field - */ - public String getName() { - return fieldName; + } + + ; + + public boolean isComplex() { + return (category == Category.PRIMITIVE) ? false : true; + } + + /** + * + */ + private static final long serialVersionUID = 1L; + + String fieldName = null; + String comment = null; + Type type = null; + Category category = null; + + // Populated if column is struct, array or map types. + // If struct type, contains schema of the struct. + // If array type, contains schema of one of the elements. + // If map type, contains schema of the value element. + HCatSchema subSchema = null; + + // populated if column is Map type + Type mapKeyType = null; + + private String typeString = null; + + @SuppressWarnings("unused") + private HCatFieldSchema() { + // preventing empty ctor from being callable + } + + /** + * Returns type of the field + * @return type of the field + */ + public Type getType() { + return type; + } + + /** + * Returns category of the field + * @return category of the field + */ + public Category getCategory() { + return category; + } + + /** + * Returns name of the field + * @return name of the field + */ + public String getName() { + return fieldName; + } + + public String getComment() { + return comment; + } + + /** + * Constructor constructing a primitive datatype HCatFieldSchema + * @param fieldName Name of the primitive field + * @param type Type of the primitive field + * @throws HCatException if call made on non-primitive types + */ + public HCatFieldSchema(String fieldName, Type type, String comment) throws HCatException { + assertTypeInCategory(type, Category.PRIMITIVE, fieldName); + this.fieldName = fieldName; + this.type = type; + this.category = Category.PRIMITIVE; + this.comment = comment; + } + + /** + * Constructor for constructing a ARRAY type or STRUCT type HCatFieldSchema, passing type and subschema + * @param fieldName Name of the array or struct field + * @param type Type of the field - either Type.ARRAY or Type.STRUCT + * @param subSchema - subschema of the struct, or element schema of the elements in the array + * @throws HCatException if call made on Primitive or Map types + */ + public HCatFieldSchema(String fieldName, Type type, HCatSchema subSchema, String comment) throws HCatException { + assertTypeNotInCategory(type, Category.PRIMITIVE); + assertTypeNotInCategory(type, Category.MAP); + this.fieldName = fieldName; + this.type = type; + this.category = Category.fromType(type); + this.subSchema = subSchema; + if (type == Type.ARRAY) { + this.subSchema.get(0).setName(null); } - - public String getComment() { - return comment; + this.comment = comment; + } + + private void setName(String name) { + this.fieldName = name; + } + + /** + * Constructor for constructing a MAP type HCatFieldSchema, passing type of key and value + * @param fieldName Name of the array or struct field + * @param type Type of the field - must be Type.MAP + * @param mapKeyType - key type of the Map + * @param mapValueSchema - subschema of the value of the Map + * @throws HCatException if call made on non-Map types + */ + public HCatFieldSchema(String fieldName, Type type, Type mapKeyType, HCatSchema mapValueSchema, String comment) throws HCatException { + assertTypeInCategory(type, Category.MAP, fieldName); + assertTypeInCategory(mapKeyType, Category.PRIMITIVE, fieldName); + this.fieldName = fieldName; + this.type = Type.MAP; + this.category = Category.MAP; + this.mapKeyType = mapKeyType; + this.subSchema = mapValueSchema; + this.subSchema.get(0).setName(null); + this.comment = comment; + } + + public HCatSchema getStructSubSchema() throws HCatException { + assertTypeInCategory(this.type, Category.STRUCT, this.fieldName); + return subSchema; + } + + public HCatSchema getArrayElementSchema() throws HCatException { + assertTypeInCategory(this.type, Category.ARRAY, this.fieldName); + return subSchema; + } + + public Type getMapKeyType() throws HCatException { + assertTypeInCategory(this.type, Category.MAP, this.fieldName); + return mapKeyType; + } + + public HCatSchema getMapValueSchema() throws HCatException { + assertTypeInCategory(this.type, Category.MAP, this.fieldName); + return subSchema; + } + + private static void assertTypeInCategory(Type type, Category category, String fieldName) throws HCatException { + Category typeCategory = Category.fromType(type); + if (typeCategory != category) { + throw new HCatException("Type category mismatch. Expected " + category + " but type " + type + " in category " + typeCategory + " (field " + fieldName + ")"); } + } - /** - * Constructor constructing a primitive datatype HCatFieldSchema - * @param fieldName Name of the primitive field - * @param type Type of the primitive field - * @throws HCatException if call made on non-primitive types - */ - public HCatFieldSchema(String fieldName, Type type, String comment) throws HCatException { - assertTypeInCategory(type, Category.PRIMITIVE, fieldName); - this.fieldName = fieldName; - this.type = type; - this.category = Category.PRIMITIVE; - this.comment = comment; + private static void assertTypeNotInCategory(Type type, Category category) throws HCatException { + Category typeCategory = Category.fromType(type); + if (typeCategory == category) { + throw new HCatException("Type category mismatch. Expected type " + type + " not in category " + category + " but was so."); } - - /** - * Constructor for constructing a ARRAY type or STRUCT type HCatFieldSchema, passing type and subschema - * @param fieldName Name of the array or struct field - * @param type Type of the field - either Type.ARRAY or Type.STRUCT - * @param subSchema - subschema of the struct, or element schema of the elements in the array - * @throws HCatException if call made on Primitive or Map types - */ - public HCatFieldSchema(String fieldName, Type type, HCatSchema subSchema, String comment) throws HCatException { - assertTypeNotInCategory(type, Category.PRIMITIVE); - assertTypeNotInCategory(type, Category.MAP); - this.fieldName = fieldName; - this.type = type; - this.category = Category.fromType(type); - this.subSchema = subSchema; - if (type == Type.ARRAY) { - this.subSchema.get(0).setName(null); - } - this.comment = comment; + } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("fieldName", fieldName) + .append("comment", comment) + .append("type", getTypeString()) + .append("category", category) + .toString(); + } + + public String getTypeString() { + if (typeString != null) { + return typeString; } - private void setName(String name) { - this.fieldName = name; + StringBuilder sb = new StringBuilder(); + if (Category.PRIMITIVE == category) { + sb.append(type); + } else if (Category.STRUCT == category) { + sb.append("struct<"); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); + } else if (Category.ARRAY == category) { + sb.append("array<"); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); + } else if (Category.MAP == category) { + sb.append("map<"); + sb.append(mapKeyType); + sb.append(","); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); } + return (typeString = sb.toString().toLowerCase()); + } - /** - * Constructor for constructing a MAP type HCatFieldSchema, passing type of key and value - * @param fieldName Name of the array or struct field - * @param type Type of the field - must be Type.MAP - * @param mapKeyType - key type of the Map - * @param mapValueSchema - subschema of the value of the Map - * @throws HCatException if call made on non-Map types - */ - public HCatFieldSchema(String fieldName, Type type, Type mapKeyType, HCatSchema mapValueSchema, String comment) throws HCatException { - assertTypeInCategory(type, Category.MAP, fieldName); - assertTypeInCategory(mapKeyType, Category.PRIMITIVE, fieldName); - this.fieldName = fieldName; - this.type = Type.MAP; - this.category = Category.MAP; - this.mapKeyType = mapKeyType; - this.subSchema = mapValueSchema; - this.subSchema.get(0).setName(null); - this.comment = comment; + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; } - - public HCatSchema getStructSubSchema() throws HCatException { - assertTypeInCategory(this.type, Category.STRUCT, this.fieldName); - return subSchema; + if (obj == null) { + return false; } - - public HCatSchema getArrayElementSchema() throws HCatException { - assertTypeInCategory(this.type, Category.ARRAY, this.fieldName); - return subSchema; + if (!(obj instanceof HCatFieldSchema)) { + return false; } - - public Type getMapKeyType() throws HCatException { - assertTypeInCategory(this.type, Category.MAP, this.fieldName); - return mapKeyType; + HCatFieldSchema other = (HCatFieldSchema) obj; + if (category != other.category) { + return false; } - - public HCatSchema getMapValueSchema() throws HCatException { - assertTypeInCategory(this.type, Category.MAP, this.fieldName); - return subSchema; - } - - private static void assertTypeInCategory(Type type, Category category, String fieldName) throws HCatException { - Category typeCategory = Category.fromType(type); - if (typeCategory != category) { - throw new HCatException("Type category mismatch. Expected " + category + " but type " + type + " in category " + typeCategory + " (field " + fieldName + ")"); - } + if (fieldName == null) { + if (other.fieldName != null) { + return false; + } + } else if (!fieldName.equals(other.fieldName)) { + return false; } - - private static void assertTypeNotInCategory(Type type, Category category) throws HCatException { - Category typeCategory = Category.fromType(type); - if (typeCategory == category) { - throw new HCatException("Type category mismatch. Expected type " + type + " not in category " + category + " but was so."); - } - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("fieldName", fieldName) - .append("comment", comment) - .append("type", getTypeString()) - .append("category", category) - .toString(); - } - - public String getTypeString() { - if (typeString != null) { - return typeString; - } - - StringBuilder sb = new StringBuilder(); - if (Category.PRIMITIVE == category) { - sb.append(type); - } else if (Category.STRUCT == category) { - sb.append("struct<"); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } else if (Category.ARRAY == category) { - sb.append("array<"); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } else if (Category.MAP == category) { - sb.append("map<"); - sb.append(mapKeyType); - sb.append(","); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } - return (typeString = sb.toString().toLowerCase()); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HCatFieldSchema)) { - return false; - } - HCatFieldSchema other = (HCatFieldSchema) obj; - if (category != other.category) { - return false; - } - if (fieldName == null) { - if (other.fieldName != null) { - return false; - } - } else if (!fieldName.equals(other.fieldName)) { - return false; - } - if (this.getTypeString() == null) { - if (other.getTypeString() != null) { - return false; - } - } else if (!this.getTypeString().equals(other.getTypeString())) { - return false; - } - return true; - } - - @Override - public int hashCode() { - //result could be cached if this object were to be made immutable... - int result = 17; - result = 31 * result + (category == null ? 0 : category.hashCode()); - result = 31 * result + (fieldName == null ? 0 : fieldName.hashCode()); - result = 31 * result + (getTypeString() == null ? 0 : - getTypeString().hashCode()); - return result; + if (this.getTypeString() == null) { + if (other.getTypeString() != null) { + return false; + } + } else if (!this.getTypeString().equals(other.getTypeString())) { + return false; } + return true; + } + + @Override + public int hashCode() { + //result could be cached if this object were to be made immutable... + int result = 17; + result = 31 * result + (category == null ? 0 : category.hashCode()); + result = 31 * result + (fieldName == null ? 0 : fieldName.hashCode()); + result = 31 * result + (getTypeString() == null ? 0 : + getTypeString().hashCode()); + return result; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchema.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchema.java index e66b1c9..5fc79ec 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchema.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchema.java @@ -33,153 +33,153 @@ public class HCatSchema implements Serializable { - private static final long serialVersionUID = 1L; - - private final List fieldSchemas; - //HCatFieldSchema.getName()->position - private final Map fieldPositionMap; - private final List fieldNames; - - /** - * - * @param fieldSchemas is now owned by HCatSchema. Any subsequent modifications - * on fieldSchemas won't get reflected in HCatSchema. Each fieldSchema's name - * in the list must be unique, otherwise throws IllegalArgumentException. - */ - public HCatSchema(final List fieldSchemas) { - this.fieldSchemas = new ArrayList(fieldSchemas); - int idx = 0; - fieldPositionMap = new HashMap(); - fieldNames = new ArrayList(); - for (HCatFieldSchema field : fieldSchemas) { - if (field == null) - throw new IllegalArgumentException("Field cannot be null"); - - String fieldName = field.getName(); - if (fieldPositionMap.containsKey(fieldName)) - throw new IllegalArgumentException("Field named " + fieldName + - " already exists"); - fieldPositionMap.put(fieldName, idx); - fieldNames.add(fieldName); - idx++; - } + private static final long serialVersionUID = 1L; + + private final List fieldSchemas; + //HCatFieldSchema.getName()->position + private final Map fieldPositionMap; + private final List fieldNames; + + /** + * + * @param fieldSchemas is now owned by HCatSchema. Any subsequent modifications + * on fieldSchemas won't get reflected in HCatSchema. Each fieldSchema's name + * in the list must be unique, otherwise throws IllegalArgumentException. + */ + public HCatSchema(final List fieldSchemas) { + this.fieldSchemas = new ArrayList(fieldSchemas); + int idx = 0; + fieldPositionMap = new HashMap(); + fieldNames = new ArrayList(); + for (HCatFieldSchema field : fieldSchemas) { + if (field == null) + throw new IllegalArgumentException("Field cannot be null"); + + String fieldName = field.getName(); + if (fieldPositionMap.containsKey(fieldName)) + throw new IllegalArgumentException("Field named " + fieldName + + " already exists"); + fieldPositionMap.put(fieldName, idx); + fieldNames.add(fieldName); + idx++; } - - public void append(final HCatFieldSchema hfs) throws HCatException { - if (hfs == null) - throw new HCatException("Attempt to append null HCatFieldSchema in HCatSchema."); - - String fieldName = hfs.getName(); - if (fieldPositionMap.containsKey(fieldName)) - throw new HCatException("Attempt to append HCatFieldSchema with already " + - "existing name: " + fieldName + "."); - - this.fieldSchemas.add(hfs); - this.fieldNames.add(fieldName); - this.fieldPositionMap.put(fieldName, this.size() - 1); - } - - /** - * Users are not allowed to modify the list directly, since HCatSchema - * maintains internal state. Use append/remove to modify the schema. - */ - public List getFields() { - return Collections.unmodifiableList(this.fieldSchemas); - } - - /** - * @param fieldName - * @return the index of field named fieldName in Schema. If field is not - * present, returns null. - */ - public Integer getPosition(String fieldName) { - return fieldPositionMap.get(fieldName); - } - - public HCatFieldSchema get(String fieldName) throws HCatException { - return get(getPosition(fieldName)); + } + + public void append(final HCatFieldSchema hfs) throws HCatException { + if (hfs == null) + throw new HCatException("Attempt to append null HCatFieldSchema in HCatSchema."); + + String fieldName = hfs.getName(); + if (fieldPositionMap.containsKey(fieldName)) + throw new HCatException("Attempt to append HCatFieldSchema with already " + + "existing name: " + fieldName + "."); + + this.fieldSchemas.add(hfs); + this.fieldNames.add(fieldName); + this.fieldPositionMap.put(fieldName, this.size() - 1); + } + + /** + * Users are not allowed to modify the list directly, since HCatSchema + * maintains internal state. Use append/remove to modify the schema. + */ + public List getFields() { + return Collections.unmodifiableList(this.fieldSchemas); + } + + /** + * @param fieldName + * @return the index of field named fieldName in Schema. If field is not + * present, returns null. + */ + public Integer getPosition(String fieldName) { + return fieldPositionMap.get(fieldName); + } + + public HCatFieldSchema get(String fieldName) throws HCatException { + return get(getPosition(fieldName)); + } + + public List getFieldNames() { + return this.fieldNames; + } + + public HCatFieldSchema get(int position) { + return fieldSchemas.get(position); + } + + public int size() { + return fieldSchemas.size(); + } + + public void remove(final HCatFieldSchema hcatFieldSchema) throws HCatException { + + if (!fieldSchemas.contains(hcatFieldSchema)) { + throw new HCatException("Attempt to delete a non-existent column from HCat Schema: " + hcatFieldSchema); } - public List getFieldNames() { - return this.fieldNames; + fieldSchemas.remove(hcatFieldSchema); + fieldPositionMap.remove(hcatFieldSchema.getName()); + fieldNames.remove(hcatFieldSchema.getName()); + } + + @Override + public String toString() { + boolean first = true; + StringBuilder sb = new StringBuilder(); + for (HCatFieldSchema hfs : fieldSchemas) { + if (!first) { + sb.append(","); + } else { + first = false; + } + if (hfs.getName() != null) { + sb.append(hfs.getName()); + sb.append(":"); + } + sb.append(hfs.toString()); } - - public HCatFieldSchema get(int position) { - return fieldSchemas.get(position); + return sb.toString(); + } + + public String getSchemaAsTypeString() { + boolean first = true; + StringBuilder sb = new StringBuilder(); + for (HCatFieldSchema hfs : fieldSchemas) { + if (!first) { + sb.append(","); + } else { + first = false; + } + if (hfs.getName() != null) { + sb.append(hfs.getName()); + sb.append(":"); + } + sb.append(hfs.getTypeString()); } + return sb.toString(); + } - public int size() { - return fieldSchemas.size(); + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; } - - public void remove(final HCatFieldSchema hcatFieldSchema) throws HCatException { - - if (!fieldSchemas.contains(hcatFieldSchema)) { - throw new HCatException("Attempt to delete a non-existent column from HCat Schema: " + hcatFieldSchema); - } - - fieldSchemas.remove(hcatFieldSchema); - fieldPositionMap.remove(hcatFieldSchema.getName()); - fieldNames.remove(hcatFieldSchema.getName()); + if (obj == null) { + return false; } - - @Override - public String toString() { - boolean first = true; - StringBuilder sb = new StringBuilder(); - for (HCatFieldSchema hfs : fieldSchemas) { - if (!first) { - sb.append(","); - } else { - first = false; - } - if (hfs.getName() != null) { - sb.append(hfs.getName()); - sb.append(":"); - } - sb.append(hfs.toString()); - } - return sb.toString(); + if (!(obj instanceof HCatSchema)) { + return false; } - - public String getSchemaAsTypeString() { - boolean first = true; - StringBuilder sb = new StringBuilder(); - for (HCatFieldSchema hfs : fieldSchemas) { - if (!first) { - sb.append(","); - } else { - first = false; - } - if (hfs.getName() != null) { - sb.append(hfs.getName()); - sb.append(":"); - } - sb.append(hfs.getTypeString()); - } - return sb.toString(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HCatSchema)) { - return false; - } - HCatSchema other = (HCatSchema) obj; - if (!this.getFields().equals(other.getFields())) { - return false; - } - return true; + HCatSchema other = (HCatSchema) obj; + if (!this.getFields().equals(other.getFields())) { + return false; } + return true; + } - @Override - public int hashCode() { - return toString().hashCode(); - } + @Override + public int hashCode() { + return toString().hashCode(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchemaUtils.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchemaUtils.java index e9fdb83..0fa7465 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchemaUtils.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchemaUtils.java @@ -37,193 +37,193 @@ public class HCatSchemaUtils { - public static CollectionBuilder getStructSchemaBuilder() { - return new CollectionBuilder(); - } - - public static CollectionBuilder getListSchemaBuilder() { - return new CollectionBuilder(); - } - - public static MapBuilder getMapSchemaBuilder() { - return new MapBuilder(); - } - - - public static abstract class HCatSchemaBuilder { - public abstract HCatSchema build() throws HCatException; - } - - public static class CollectionBuilder extends HCatSchemaBuilder { // for STRUCTS(multiple-add-calls) and LISTS(single-add-call) - List fieldSchemas = null; - - CollectionBuilder() { - fieldSchemas = new ArrayList(); - } + public static CollectionBuilder getStructSchemaBuilder() { + return new CollectionBuilder(); + } - public CollectionBuilder addField(FieldSchema fieldSchema) throws HCatException { - return this.addField(getHCatFieldSchema(fieldSchema)); - } + public static CollectionBuilder getListSchemaBuilder() { + return new CollectionBuilder(); + } - public CollectionBuilder addField(HCatFieldSchema fieldColumnSchema) { - fieldSchemas.add(fieldColumnSchema); - return this; - } - - @Override - public HCatSchema build() throws HCatException { - return new HCatSchema(fieldSchemas); - } - - } - - public static class MapBuilder extends HCatSchemaBuilder { - - Type keyType = null; - HCatSchema valueSchema = null; - - @Override - public HCatSchema build() throws HCatException { - List fslist = new ArrayList(); - fslist.add(new HCatFieldSchema(null, Type.MAP, keyType, valueSchema, null)); - return new HCatSchema(fslist); - } - - public MapBuilder withValueSchema(HCatSchema valueSchema) { - this.valueSchema = valueSchema; - return this; - } - - public MapBuilder withKeyType(Type keyType) { - this.keyType = keyType; - return this; - } - - } + public static MapBuilder getMapSchemaBuilder() { + return new MapBuilder(); + } - /** - * Convert a HCatFieldSchema to a FieldSchema - * @param fs FieldSchema to convert - * @return HCatFieldSchema representation of FieldSchema - * @throws HCatException - */ - public static HCatFieldSchema getHCatFieldSchema(FieldSchema fs) throws HCatException { - String fieldName = fs.getName(); - TypeInfo baseTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); - return getHCatFieldSchema(fieldName, baseTypeInfo); + public static abstract class HCatSchemaBuilder { + public abstract HCatSchema build() throws HCatException; + } + + public static class CollectionBuilder extends HCatSchemaBuilder { // for STRUCTS(multiple-add-calls) and LISTS(single-add-call) + List fieldSchemas = null; + + CollectionBuilder() { + fieldSchemas = new ArrayList(); } - - private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo) throws HCatException { - Category typeCategory = fieldTypeInfo.getCategory(); - HCatFieldSchema hCatFieldSchema; - if (Category.PRIMITIVE == typeCategory) { - hCatFieldSchema = new HCatFieldSchema(fieldName, getPrimitiveHType(fieldTypeInfo), null); - } else if (Category.STRUCT == typeCategory) { - HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, null); - } else if (Category.LIST == typeCategory) { - HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo()); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, null); - } else if (Category.MAP == typeCategory) { - HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo()); - HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo()); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.MAP, mapKeyType, subSchema, null); - } else { - throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null); - } - return hCatFieldSchema; - } - - private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) { - switch (((PrimitiveTypeInfo) basePrimitiveTypeInfo).getPrimitiveCategory()) { - case BOOLEAN: - return Type.BOOLEAN; - case BYTE: - return Type.TINYINT; - case DOUBLE: - return Type.DOUBLE; - case FLOAT: - return Type.FLOAT; - case INT: - return Type.INT; - case LONG: - return Type.BIGINT; - case SHORT: - return Type.SMALLINT; - case STRING: - return Type.STRING; - case BINARY: - return Type.BINARY; - default: - throw new TypeNotPresentException(((PrimitiveTypeInfo) basePrimitiveTypeInfo).getTypeName(), null); - } - } - - public static HCatSchema getHCatSchema(Schema schema) throws HCatException { - return getHCatSchema(schema.getFieldSchemas()); - } - - public static HCatSchema getHCatSchema(List fslist) throws HCatException { - CollectionBuilder builder = getStructSchemaBuilder(); - for (FieldSchema fieldSchema : fslist) { - builder.addField(fieldSchema); - } - return builder.build(); - } - - private static HCatSchema constructHCatSchema(StructTypeInfo stypeInfo) throws HCatException { - CollectionBuilder builder = getStructSchemaBuilder(); - for (String fieldName : ((StructTypeInfo) stypeInfo).getAllStructFieldNames()) { - builder.addField(getHCatFieldSchema(fieldName, ((StructTypeInfo) stypeInfo).getStructFieldTypeInfo(fieldName))); - } - return builder.build(); - } - - public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException { - Category typeCategory = typeInfo.getCategory(); - HCatSchema hCatSchema; - if (Category.PRIMITIVE == typeCategory) { - hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, getPrimitiveHType(typeInfo), null)).build(); - } else if (Category.STRUCT == typeCategory) { - HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo); - hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build(); - } else if (Category.LIST == typeCategory) { - CollectionBuilder builder = getListSchemaBuilder(); - builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo())); - hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), ""))); - } else if (Category.MAP == typeCategory) { - HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); - HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); - MapBuilder builder = getMapSchemaBuilder(); - hCatSchema = builder.withKeyType(mapKeyType).withValueSchema(subSchema).build(); - } else { - throw new TypeNotPresentException(typeInfo.getTypeName(), null); - } - return hCatSchema; - } - - public static HCatSchema getHCatSchemaFromTypeString(String typeString) throws HCatException { - return getHCatSchema(TypeInfoUtils.getTypeInfoFromTypeString(typeString)); - } - - public static HCatSchema getHCatSchema(String schemaString) throws HCatException { - if ((schemaString == null) || (schemaString.trim().isEmpty())) { - return new HCatSchema(new ArrayList()); // empty HSchema construct - } - HCatSchema outerSchema = getHCatSchemaFromTypeString("struct<" + schemaString + ">"); - return outerSchema.get(0).getStructSubSchema(); - } - - public static FieldSchema getFieldSchema(HCatFieldSchema hcatFieldSchema) { - return new FieldSchema(hcatFieldSchema.getName(), hcatFieldSchema.getTypeString(), hcatFieldSchema.getComment()); - } - - public static List getFieldSchemas(List hcatFieldSchemas) { - List lfs = new ArrayList(); - for (HCatFieldSchema hfs : hcatFieldSchemas) { - lfs.add(getFieldSchema(hfs)); - } - return lfs; + + public CollectionBuilder addField(FieldSchema fieldSchema) throws HCatException { + return this.addField(getHCatFieldSchema(fieldSchema)); } + + public CollectionBuilder addField(HCatFieldSchema fieldColumnSchema) { + fieldSchemas.add(fieldColumnSchema); + return this; + } + + @Override + public HCatSchema build() throws HCatException { + return new HCatSchema(fieldSchemas); + } + + } + + public static class MapBuilder extends HCatSchemaBuilder { + + Type keyType = null; + HCatSchema valueSchema = null; + + @Override + public HCatSchema build() throws HCatException { + List fslist = new ArrayList(); + fslist.add(new HCatFieldSchema(null, Type.MAP, keyType, valueSchema, null)); + return new HCatSchema(fslist); + } + + public MapBuilder withValueSchema(HCatSchema valueSchema) { + this.valueSchema = valueSchema; + return this; + } + + public MapBuilder withKeyType(Type keyType) { + this.keyType = keyType; + return this; + } + + } + + + /** + * Convert a HCatFieldSchema to a FieldSchema + * @param fs FieldSchema to convert + * @return HCatFieldSchema representation of FieldSchema + * @throws HCatException + */ + public static HCatFieldSchema getHCatFieldSchema(FieldSchema fs) throws HCatException { + String fieldName = fs.getName(); + TypeInfo baseTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); + return getHCatFieldSchema(fieldName, baseTypeInfo); + } + + private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo) throws HCatException { + Category typeCategory = fieldTypeInfo.getCategory(); + HCatFieldSchema hCatFieldSchema; + if (Category.PRIMITIVE == typeCategory) { + hCatFieldSchema = new HCatFieldSchema(fieldName, getPrimitiveHType(fieldTypeInfo), null); + } else if (Category.STRUCT == typeCategory) { + HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, null); + } else if (Category.LIST == typeCategory) { + HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo()); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, null); + } else if (Category.MAP == typeCategory) { + HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo()); + HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo()); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.MAP, mapKeyType, subSchema, null); + } else { + throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null); + } + return hCatFieldSchema; + } + + private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) { + switch (((PrimitiveTypeInfo) basePrimitiveTypeInfo).getPrimitiveCategory()) { + case BOOLEAN: + return Type.BOOLEAN; + case BYTE: + return Type.TINYINT; + case DOUBLE: + return Type.DOUBLE; + case FLOAT: + return Type.FLOAT; + case INT: + return Type.INT; + case LONG: + return Type.BIGINT; + case SHORT: + return Type.SMALLINT; + case STRING: + return Type.STRING; + case BINARY: + return Type.BINARY; + default: + throw new TypeNotPresentException(((PrimitiveTypeInfo) basePrimitiveTypeInfo).getTypeName(), null); + } + } + + public static HCatSchema getHCatSchema(Schema schema) throws HCatException { + return getHCatSchema(schema.getFieldSchemas()); + } + + public static HCatSchema getHCatSchema(List fslist) throws HCatException { + CollectionBuilder builder = getStructSchemaBuilder(); + for (FieldSchema fieldSchema : fslist) { + builder.addField(fieldSchema); + } + return builder.build(); + } + + private static HCatSchema constructHCatSchema(StructTypeInfo stypeInfo) throws HCatException { + CollectionBuilder builder = getStructSchemaBuilder(); + for (String fieldName : ((StructTypeInfo) stypeInfo).getAllStructFieldNames()) { + builder.addField(getHCatFieldSchema(fieldName, ((StructTypeInfo) stypeInfo).getStructFieldTypeInfo(fieldName))); + } + return builder.build(); + } + + public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException { + Category typeCategory = typeInfo.getCategory(); + HCatSchema hCatSchema; + if (Category.PRIMITIVE == typeCategory) { + hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, getPrimitiveHType(typeInfo), null)).build(); + } else if (Category.STRUCT == typeCategory) { + HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo); + hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build(); + } else if (Category.LIST == typeCategory) { + CollectionBuilder builder = getListSchemaBuilder(); + builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo())); + hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), ""))); + } else if (Category.MAP == typeCategory) { + HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); + HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); + MapBuilder builder = getMapSchemaBuilder(); + hCatSchema = builder.withKeyType(mapKeyType).withValueSchema(subSchema).build(); + } else { + throw new TypeNotPresentException(typeInfo.getTypeName(), null); + } + return hCatSchema; + } + + public static HCatSchema getHCatSchemaFromTypeString(String typeString) throws HCatException { + return getHCatSchema(TypeInfoUtils.getTypeInfoFromTypeString(typeString)); + } + + public static HCatSchema getHCatSchema(String schemaString) throws HCatException { + if ((schemaString == null) || (schemaString.trim().isEmpty())) { + return new HCatSchema(new ArrayList()); // empty HSchema construct + } + HCatSchema outerSchema = getHCatSchemaFromTypeString("struct<" + schemaString + ">"); + return outerSchema.get(0).getStructSubSchema(); + } + + public static FieldSchema getFieldSchema(HCatFieldSchema hcatFieldSchema) { + return new FieldSchema(hcatFieldSchema.getName(), hcatFieldSchema.getTypeString(), hcatFieldSchema.getComment()); + } + + public static List getFieldSchemas(List hcatFieldSchemas) { + List lfs = new ArrayList(); + for (HCatFieldSchema hfs : hcatFieldSchemas) { + lfs.add(getFieldSchema(hfs)); + } + return lfs; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/DataTransferFactory.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/DataTransferFactory.java index 5d9d9ef..a415ac6 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/DataTransferFactory.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/DataTransferFactory.java @@ -35,102 +35,102 @@ public class DataTransferFactory { - /** - * This should be called once from master node to obtain an instance of - * {@link HCatReader}. - * - * @param re - * ReadEntity built using {@link ReadEntity.Builder} - * @param config - * any configuration which master node wants to pass to HCatalog - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final ReadEntity re, - final Map config) { - // In future, this may examine ReadEntity and/or config to return - // appropriate HCatReader - return new HCatInputFormatReader(re, config); - } + /** + * This should be called once from master node to obtain an instance of + * {@link HCatReader}. + * + * @param re + * ReadEntity built using {@link ReadEntity.Builder} + * @param config + * any configuration which master node wants to pass to HCatalog + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final ReadEntity re, + final Map config) { + // In future, this may examine ReadEntity and/or config to return + // appropriate HCatReader + return new HCatInputFormatReader(re, config); + } - /** - * This should only be called once from every slave node to obtain an instance - * of {@link HCatReader}. - * - * @param split - * input split obtained at master node - * @param config - * configuration obtained at master node - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final InputSplit split, - final Configuration config) { - // In future, this may examine config to return appropriate HCatReader - return getHCatReader(split, config, DefaultStateProvider.get()); - } + /** + * This should only be called once from every slave node to obtain an instance + * of {@link HCatReader}. + * + * @param split + * input split obtained at master node + * @param config + * configuration obtained at master node + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final InputSplit split, + final Configuration config) { + // In future, this may examine config to return appropriate HCatReader + return getHCatReader(split, config, DefaultStateProvider.get()); + } - /** - * This should only be called once from every slave node to obtain an instance - * of {@link HCatReader}. This should be called if an external system has some - * state to provide to HCatalog. - * - * @param split - * input split obtained at master node - * @param config - * configuration obtained at master node - * @param sp - * {@link StateProvider} - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final InputSplit split, - final Configuration config, StateProvider sp) { - // In future, this may examine config to return appropriate HCatReader - return new HCatInputFormatReader(split, config, sp); - } + /** + * This should only be called once from every slave node to obtain an instance + * of {@link HCatReader}. This should be called if an external system has some + * state to provide to HCatalog. + * + * @param split + * input split obtained at master node + * @param config + * configuration obtained at master node + * @param sp + * {@link StateProvider} + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final InputSplit split, + final Configuration config, StateProvider sp) { + // In future, this may examine config to return appropriate HCatReader + return new HCatInputFormatReader(split, config, sp); + } - /** - * This should be called at master node to obtain an instance of - * {@link HCatWriter}. - * - * @param we - * WriteEntity built using {@link WriteEntity.Builder} - * @param config - * any configuration which master wants to pass to HCatalog - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriteEntity we, - final Map config) { - // In future, this may examine WriteEntity and/or config to return - // appropriate HCatWriter - return new HCatOutputFormatWriter(we, config); - } + /** + * This should be called at master node to obtain an instance of + * {@link HCatWriter}. + * + * @param we + * WriteEntity built using {@link WriteEntity.Builder} + * @param config + * any configuration which master wants to pass to HCatalog + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriteEntity we, + final Map config) { + // In future, this may examine WriteEntity and/or config to return + // appropriate HCatWriter + return new HCatOutputFormatWriter(we, config); + } - /** - * This should be called at slave nodes to obtain an instance of - * {@link HCatWriter}. - * - * @param cntxt - * {@link WriterContext} obtained at master node - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriterContext cntxt) { - // In future, this may examine context to return appropriate HCatWriter - return getHCatWriter(cntxt, DefaultStateProvider.get()); - } + /** + * This should be called at slave nodes to obtain an instance of + * {@link HCatWriter}. + * + * @param cntxt + * {@link WriterContext} obtained at master node + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriterContext cntxt) { + // In future, this may examine context to return appropriate HCatWriter + return getHCatWriter(cntxt, DefaultStateProvider.get()); + } - /** - * This should be called at slave nodes to obtain an instance of - * {@link HCatWriter}. If an external system has some mechanism for providing - * state to HCatalog, this constructor can be used. - * - * @param cntxt - * {@link WriterContext} obtained at master node - * @param sp - * {@link StateProvider} - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriterContext cntxt, - final StateProvider sp) { - // In future, this may examine context to return appropriate HCatWriter - return new HCatOutputFormatWriter(cntxt.getConf(), sp); - } + /** + * This should be called at slave nodes to obtain an instance of + * {@link HCatWriter}. If an external system has some mechanism for providing + * state to HCatalog, this constructor can be used. + * + * @param cntxt + * {@link WriterContext} obtained at master node + * @param sp + * {@link StateProvider} + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriterContext cntxt, + final StateProvider sp) { + // In future, this may examine context to return appropriate HCatWriter + return new HCatOutputFormatWriter(cntxt.getConf(), sp); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/EntityBase.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/EntityBase.java index 65f3c9b..2db57cf 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/EntityBase.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/EntityBase.java @@ -30,31 +30,31 @@ abstract class EntityBase { - String region; - String tableName; - String dbName; - Map partitionKVs; + String region; + String tableName; + String dbName; + Map partitionKVs; - /** - * Common methods for {@link ReadEntity} and {@link WriteEntity} - */ + /** + * Common methods for {@link ReadEntity} and {@link WriteEntity} + */ - abstract static class Entity extends EntityBase { + abstract static class Entity extends EntityBase { - public String getRegion() { - return region; - } + public String getRegion() { + return region; + } - public String getTableName() { - return tableName; - } + public String getTableName() { + return tableName; + } - public String getDbName() { - return dbName; - } + public String getDbName() { + return dbName; + } - public Map getPartitionKVs() { - return partitionKVs; - } + public Map getPartitionKVs() { + return partitionKVs; } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatReader.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatReader.java index 8286389..ef1d065 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatReader.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatReader.java @@ -35,68 +35,68 @@ public abstract class HCatReader { - /** - * This should be called at master node to obtain {@link ReaderContext} which - * then should be serialized and sent to slave nodes. - * - * @return {@link ReaderContext} - * @throws HCatException - */ - public abstract ReaderContext prepareRead() throws HCatException; + /** + * This should be called at master node to obtain {@link ReaderContext} which + * then should be serialized and sent to slave nodes. + * + * @return {@link ReaderContext} + * @throws HCatException + */ + public abstract ReaderContext prepareRead() throws HCatException; - /** - * This should be called at slave nodes to read {@link HCatRecord}s - * - * @return {@link Iterator} of {@link HCatRecord} - * @throws HCatException - */ - public abstract Iterator read() throws HCatException; + /** + * This should be called at slave nodes to read {@link HCatRecord}s + * + * @return {@link Iterator} of {@link HCatRecord} + * @throws HCatException + */ + public abstract Iterator read() throws HCatException; - /** - * This constructor will be invoked by {@link DataTransferFactory} at master - * node. Don't use this constructor. Instead, use {@link DataTransferFactory} - * - * @param re - * @param config - */ - protected HCatReader(final ReadEntity re, final Map config) { - this(config); - this.re = re; - } + /** + * This constructor will be invoked by {@link DataTransferFactory} at master + * node. Don't use this constructor. Instead, use {@link DataTransferFactory} + * + * @param re + * @param config + */ + protected HCatReader(final ReadEntity re, final Map config) { + this(config); + this.re = re; + } - /** - * This constructor will be invoked by {@link DataTransferFactory} at slave - * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory} - * - * @param config - * @param sp - */ + /** + * This constructor will be invoked by {@link DataTransferFactory} at slave + * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory} + * + * @param config + * @param sp + */ - protected HCatReader(final Configuration config, StateProvider sp) { - this.conf = config; - this.sp = sp; - } + protected HCatReader(final Configuration config, StateProvider sp) { + this.conf = config; + this.sp = sp; + } - protected ReadEntity re; // This will be null at slaves. - protected Configuration conf; - protected ReaderContext info; - protected StateProvider sp; // This will be null at master. + protected ReadEntity re; // This will be null at slaves. + protected Configuration conf; + protected ReaderContext info; + protected StateProvider sp; // This will be null at master. - private HCatReader(final Map config) { - Configuration conf = new Configuration(); - if (null != config) { - for (Entry kv : config.entrySet()) { - conf.set(kv.getKey(), kv.getValue()); - } - } - this.conf = conf; + private HCatReader(final Map config) { + Configuration conf = new Configuration(); + if (null != config) { + for (Entry kv : config.entrySet()) { + conf.set(kv.getKey(), kv.getValue()); + } } + this.conf = conf; + } - public Configuration getConf() { - if (null == conf) { - throw new IllegalStateException( - "HCatReader is not constructed correctly."); - } - return conf; + public Configuration getConf() { + if (null == conf) { + throw new IllegalStateException( + "HCatReader is not constructed correctly."); } + return conf; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatWriter.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatWriter.java index 23ab8b6..297e657 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatWriter.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatWriter.java @@ -36,79 +36,79 @@ public abstract class HCatWriter { - protected Configuration conf; - protected WriteEntity we; // This will be null at slave nodes. - protected WriterContext info; - protected StateProvider sp; + protected Configuration conf; + protected WriteEntity we; // This will be null at slave nodes. + protected WriterContext info; + protected StateProvider sp; - /** - * External system should invoke this method exactly once from a master node. - * - * @return {@link WriterContext} This should be serialized and sent to slave - * nodes to construct HCatWriter there. - * @throws HCatException - */ - public abstract WriterContext prepareWrite() throws HCatException; + /** + * External system should invoke this method exactly once from a master node. + * + * @return {@link WriterContext} This should be serialized and sent to slave + * nodes to construct HCatWriter there. + * @throws HCatException + */ + public abstract WriterContext prepareWrite() throws HCatException; - /** - * This method should be used at slave needs to perform writes. - * - * @param recordItr - * {@link Iterator} records to be written into HCatalog. - * @throws {@link HCatException} - */ - public abstract void write(final Iterator recordItr) - throws HCatException; + /** + * This method should be used at slave needs to perform writes. + * + * @param recordItr + * {@link Iterator} records to be written into HCatalog. + * @throws {@link HCatException} + */ + public abstract void write(final Iterator recordItr) + throws HCatException; - /** - * This method should be called at master node. Primary purpose of this is to - * do metadata commit. - * - * @throws {@link HCatException} - */ - public abstract void commit(final WriterContext context) throws HCatException; + /** + * This method should be called at master node. Primary purpose of this is to + * do metadata commit. + * + * @throws {@link HCatException} + */ + public abstract void commit(final WriterContext context) throws HCatException; - /** - * This method should be called at master node. Primary purpose of this is to - * do cleanups in case of failures. - * - * @throws {@link HCatException} * - */ - public abstract void abort(final WriterContext context) throws HCatException; + /** + * This method should be called at master node. Primary purpose of this is to + * do cleanups in case of failures. + * + * @throws {@link HCatException} * + */ + public abstract void abort(final WriterContext context) throws HCatException; - /** - * This constructor will be used at master node - * - * @param we - * WriteEntity defines where in storage records should be written to. - * @param config - * Any configuration which external system wants to communicate to - * HCatalog for performing writes. - */ - protected HCatWriter(final WriteEntity we, final Map config) { - this(config); - this.we = we; - } - - /** - * This constructor will be used at slave nodes. - * - * @param config - */ - protected HCatWriter(final Configuration config, final StateProvider sp) { - this.conf = config; - this.sp = sp; - } + /** + * This constructor will be used at master node + * + * @param we + * WriteEntity defines where in storage records should be written to. + * @param config + * Any configuration which external system wants to communicate to + * HCatalog for performing writes. + */ + protected HCatWriter(final WriteEntity we, final Map config) { + this(config); + this.we = we; + } - private HCatWriter(final Map config) { - Configuration conf = new Configuration(); - if (config != null) { - // user is providing config, so it could be null. - for (Entry kv : config.entrySet()) { - conf.set(kv.getKey(), kv.getValue()); - } - } + /** + * This constructor will be used at slave nodes. + * + * @param config + */ + protected HCatWriter(final Configuration config, final StateProvider sp) { + this.conf = config; + this.sp = sp; + } - this.conf = conf; + private HCatWriter(final Map config) { + Configuration conf = new Configuration(); + if (config != null) { + // user is providing config, so it could be null. + for (Entry kv : config.entrySet()) { + conf.set(kv.getKey(), kv.getValue()); + } } + + this.conf = conf; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReadEntity.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReadEntity.java index b7764ed..20a99c3 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReadEntity.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReadEntity.java @@ -23,67 +23,67 @@ public class ReadEntity extends EntityBase.Entity { + private String filterString; + + /** + * Don't instantiate {@link ReadEntity} directly. Use, + * {@link ReadEntity.Builder} instead. + * + */ + private ReadEntity() { + // Not allowed + } + + private ReadEntity(Builder builder) { + + this.region = builder.region; + this.dbName = builder.dbName; + this.tableName = builder.tableName; + this.partitionKVs = builder.partitionKVs; + this.filterString = builder.filterString; + } + + public String getFilterString() { + return this.filterString; + } + + /** + * This class should be used to build {@link ReadEntity}. It follows builder + * pattern, letting you build your {@link ReadEntity} with whatever level of + * detail you want. + * + */ + public static class Builder extends EntityBase { + private String filterString; - /** - * Don't instantiate {@link ReadEntity} directly. Use, - * {@link ReadEntity.Builder} instead. - * - */ - private ReadEntity() { - // Not allowed + public Builder withRegion(final String region) { + this.region = region; + return this; + } + + public Builder withDatabase(final String dbName) { + this.dbName = dbName; + return this; } - private ReadEntity(Builder builder) { + public Builder withTable(final String tblName) { + this.tableName = tblName; + return this; + } - this.region = builder.region; - this.dbName = builder.dbName; - this.tableName = builder.tableName; - this.partitionKVs = builder.partitionKVs; - this.filterString = builder.filterString; + public Builder withPartition(final Map partKVs) { + this.partitionKVs = partKVs; + return this; } - public String getFilterString() { - return this.filterString; + public Builder withFilter(String filterString) { + this.filterString = filterString; + return this; } - /** - * This class should be used to build {@link ReadEntity}. It follows builder - * pattern, letting you build your {@link ReadEntity} with whatever level of - * detail you want. - * - */ - public static class Builder extends EntityBase { - - private String filterString; - - public Builder withRegion(final String region) { - this.region = region; - return this; - } - - public Builder withDatabase(final String dbName) { - this.dbName = dbName; - return this; - } - - public Builder withTable(final String tblName) { - this.tableName = tblName; - return this; - } - - public Builder withPartition(final Map partKVs) { - this.partitionKVs = partKVs; - return this; - } - - public Builder withFilter(String filterString) { - this.filterString = filterString; - return this; - } - - public ReadEntity build() { - return new ReadEntity(this); - } + public ReadEntity build() { + return new ReadEntity(this); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReaderContext.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReaderContext.java index 98eeebe..5abe69c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReaderContext.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReaderContext.java @@ -39,51 +39,51 @@ */ public class ReaderContext implements Externalizable, Configurable { - private static final long serialVersionUID = -2656468331739574367L; - private List splits; - private Configuration conf; + private static final long serialVersionUID = -2656468331739574367L; + private List splits; + private Configuration conf; - public ReaderContext() { - this.splits = new ArrayList(); - this.conf = new Configuration(); - } + public ReaderContext() { + this.splits = new ArrayList(); + this.conf = new Configuration(); + } - public void setInputSplits(final List splits) { - this.splits = splits; - } + public void setInputSplits(final List splits) { + this.splits = splits; + } - public List getSplits() { - return splits; - } + public List getSplits() { + return splits; + } - @Override - public Configuration getConf() { - return conf; - } + @Override + public Configuration getConf() { + return conf; + } - @Override - public void setConf(final Configuration config) { - conf = config; - } + @Override + public void setConf(final Configuration config) { + conf = config; + } - @Override - public void writeExternal(ObjectOutput out) throws IOException { - conf.write(out); - out.writeInt(splits.size()); - for (InputSplit split : splits) { - ((HCatSplit) split).write(out); - } + @Override + public void writeExternal(ObjectOutput out) throws IOException { + conf.write(out); + out.writeInt(splits.size()); + for (InputSplit split : splits) { + ((HCatSplit) split).write(out); } + } - @Override - public void readExternal(ObjectInput in) throws IOException, - ClassNotFoundException { - conf.readFields(in); - int numOfSplits = in.readInt(); - for (int i = 0; i < numOfSplits; i++) { - HCatSplit split = new HCatSplit(); - split.readFields(in); - splits.add(split); - } + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + conf.readFields(in); + int numOfSplits = in.readInt(); + for (int i = 0; i < numOfSplits; i++) { + HCatSplit split = new HCatSplit(); + split.readFields(in); + splits.add(split); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriteEntity.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriteEntity.java index b71f363..ed8ab91 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriteEntity.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriteEntity.java @@ -23,53 +23,53 @@ public class WriteEntity extends EntityBase.Entity { - /** - * Don't instantiate {@link WriteEntity} directly. Use, {@link Builder} to - * build {@link WriteEntity}. - */ + /** + * Don't instantiate {@link WriteEntity} directly. Use, {@link Builder} to + * build {@link WriteEntity}. + */ - private WriteEntity() { - // Not allowed. - } - - private WriteEntity(Builder builder) { - this.region = builder.region; - this.dbName = builder.dbName; - this.tableName = builder.tableName; - this.partitionKVs = builder.partitionKVs; - } + private WriteEntity() { + // Not allowed. + } - /** - * This class should be used to build {@link WriteEntity}. It follows builder - * pattern, letting you build your {@link WriteEntity} with whatever level of - * detail you want. - * - */ - public static class Builder extends EntityBase { + private WriteEntity(Builder builder) { + this.region = builder.region; + this.dbName = builder.dbName; + this.tableName = builder.tableName; + this.partitionKVs = builder.partitionKVs; + } - public Builder withRegion(final String region) { - this.region = region; - return this; - } + /** + * This class should be used to build {@link WriteEntity}. It follows builder + * pattern, letting you build your {@link WriteEntity} with whatever level of + * detail you want. + * + */ + public static class Builder extends EntityBase { - public Builder withDatabase(final String dbName) { - this.dbName = dbName; - return this; - } + public Builder withRegion(final String region) { + this.region = region; + return this; + } - public Builder withTable(final String tblName) { - this.tableName = tblName; - return this; - } + public Builder withDatabase(final String dbName) { + this.dbName = dbName; + return this; + } - public Builder withPartition(final Map partKVs) { - this.partitionKVs = partKVs; - return this; - } + public Builder withTable(final String tblName) { + this.tableName = tblName; + return this; + } - public WriteEntity build() { - return new WriteEntity(this); - } + public Builder withPartition(final Map partKVs) { + this.partitionKVs = partKVs; + return this; + } + public WriteEntity build() { + return new WriteEntity(this); } + + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriterContext.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriterContext.java index 2dbf4ae..5e2135c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriterContext.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriterContext.java @@ -35,31 +35,31 @@ */ public class WriterContext implements Externalizable, Configurable { - private static final long serialVersionUID = -5899374262971611840L; - private Configuration conf; + private static final long serialVersionUID = -5899374262971611840L; + private Configuration conf; - public WriterContext() { - conf = new Configuration(); - } + public WriterContext() { + conf = new Configuration(); + } - @Override - public Configuration getConf() { - return conf; - } + @Override + public Configuration getConf() { + return conf; + } - @Override - public void setConf(final Configuration config) { - this.conf = config; - } + @Override + public void setConf(final Configuration config) { + this.conf = config; + } - @Override - public void writeExternal(ObjectOutput out) throws IOException { - conf.write(out); - } + @Override + public void writeExternal(ObjectOutput out) throws IOException { + conf.write(out); + } - @Override - public void readExternal(ObjectInput in) throws IOException, - ClassNotFoundException { - conf.readFields(in); - } + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + conf.readFields(in); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatInputFormatReader.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatInputFormatReader.java index bdedd95..2272a1e 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatInputFormatReader.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatInputFormatReader.java @@ -46,92 +46,92 @@ */ public class HCatInputFormatReader extends HCatReader { - private InputSplit split; - - public HCatInputFormatReader(InputSplit split, Configuration config, - StateProvider sp) { - super(config, sp); - this.split = split; + private InputSplit split; + + public HCatInputFormatReader(InputSplit split, Configuration config, + StateProvider sp) { + super(config, sp); + this.split = split; + } + + public HCatInputFormatReader(ReadEntity info, Map config) { + super(info, config); + } + + @Override + public ReaderContext prepareRead() throws HCatException { + try { + Job job = new Job(conf); + HCatInputFormat hcif = HCatInputFormat.setInput( + job, re.getDbName(), re.getTableName()).setFilter(re.getFilterString()); + ReaderContext cntxt = new ReaderContext(); + cntxt.setInputSplits(hcif.getSplits( + ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); + cntxt.setConf(job.getConfiguration()); + return cntxt; + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } + } + + @Override + public Iterator read() throws HCatException { + + HCatInputFormat inpFmt = new HCatInputFormat(); + RecordReader rr; + try { + TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); + rr = inpFmt.createRecordReader(split, cntxt); + rr.initialize(split, cntxt); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + return new HCatRecordItr(rr); + } + + private static class HCatRecordItr implements Iterator { + + private RecordReader curRecReader; - public HCatInputFormatReader(ReadEntity info, Map config) { - super(info, config); + HCatRecordItr(RecordReader rr) { + curRecReader = rr; } @Override - public ReaderContext prepareRead() throws HCatException { - try { - Job job = new Job(conf); - HCatInputFormat hcif = HCatInputFormat.setInput( - job, re.getDbName(), re.getTableName()).setFilter(re.getFilterString()); - ReaderContext cntxt = new ReaderContext(); - cntxt.setInputSplits(hcif.getSplits( - ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); - cntxt.setConf(job.getConfiguration()); - return cntxt; - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + public boolean hasNext() { + try { + boolean retVal = curRecReader.nextKeyValue(); + if (retVal) { + return true; } + // if its false, we need to close recordReader. + curRecReader.close(); + return false; + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } @Override - public Iterator read() throws HCatException { - - HCatInputFormat inpFmt = new HCatInputFormat(); - RecordReader rr; - try { - TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); - rr = inpFmt.createRecordReader(split, cntxt); - rr.initialize(split, cntxt); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - return new HCatRecordItr(rr); + public HCatRecord next() { + try { + return curRecReader.getCurrentValue(); + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } - private static class HCatRecordItr implements Iterator { - - private RecordReader curRecReader; - - HCatRecordItr(RecordReader rr) { - curRecReader = rr; - } - - @Override - public boolean hasNext() { - try { - boolean retVal = curRecReader.nextKeyValue(); - if (retVal) { - return true; - } - // if its false, we need to close recordReader. - curRecReader.close(); - return false; - } catch (IOException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - @Override - public HCatRecord next() { - try { - return curRecReader.getCurrentValue(); - } catch (IOException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Not allowed"); - } + @Override + public void remove() { + throw new UnsupportedOperationException("Not allowed"); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java index 63379d6..17d4e38 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java @@ -48,115 +48,115 @@ */ public class HCatOutputFormatWriter extends HCatWriter { - public HCatOutputFormatWriter(WriteEntity we, Map config) { - super(we, config); - } + public HCatOutputFormatWriter(WriteEntity we, Map config) { + super(we, config); + } - public HCatOutputFormatWriter(Configuration config, StateProvider sp) { - super(config, sp); - } + public HCatOutputFormatWriter(Configuration config, StateProvider sp) { + super(config, sp); + } - @Override - public WriterContext prepareWrite() throws HCatException { - OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), - we.getTableName(), we.getPartitionKVs()); - Job job; - try { - job = new Job(conf); - HCatOutputFormat.setOutput(job, jobInfo); - HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job)); - HCatOutputFormat outFormat = new HCatOutputFormat(); - outFormat.checkOutputSpecs(job); - outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - WriterContext cntxt = new WriterContext(); - cntxt.setConf(job.getConfiguration()); - return cntxt; + @Override + public WriterContext prepareWrite() throws HCatException { + OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), + we.getTableName(), we.getPartitionKVs()); + Job job; + try { + job = new Job(conf); + HCatOutputFormat.setOutput(job, jobInfo); + HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job)); + HCatOutputFormat outFormat = new HCatOutputFormat(); + outFormat.checkOutputSpecs(job); + outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + WriterContext cntxt = new WriterContext(); + cntxt.setConf(job.getConfiguration()); + return cntxt; + } - @Override - public void write(Iterator recordItr) throws HCatException { + @Override + public void write(Iterator recordItr) throws HCatException { - int id = sp.getId(); - setVarsInConf(id); - HCatOutputFormat outFormat = new HCatOutputFormat(); - TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); - OutputCommitter committer = null; - RecordWriter, HCatRecord> writer; + int id = sp.getId(); + setVarsInConf(id); + HCatOutputFormat outFormat = new HCatOutputFormat(); + TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); + OutputCommitter committer = null; + RecordWriter, HCatRecord> writer; + try { + committer = outFormat.getOutputCommitter(cntxt); + committer.setupTask(cntxt); + writer = outFormat.getRecordWriter(cntxt); + while (recordItr.hasNext()) { + HCatRecord rec = recordItr.next(); + writer.write(null, rec); + } + writer.close(cntxt); + if (committer.needsTaskCommit(cntxt)) { + committer.commitTask(cntxt); + } + } catch (IOException e) { + if (null != committer) { try { - committer = outFormat.getOutputCommitter(cntxt); - committer.setupTask(cntxt); - writer = outFormat.getRecordWriter(cntxt); - while (recordItr.hasNext()) { - HCatRecord rec = recordItr.next(); - writer.write(null, rec); - } - writer.close(cntxt); - if (committer.needsTaskCommit(cntxt)) { - committer.commitTask(cntxt); - } - } catch (IOException e) { - if (null != committer) { - try { - committer.abortTask(cntxt); - } catch (IOException e1) { - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); - } - } - throw new HCatException("Failed while writing", e); - } catch (InterruptedException e) { - if (null != committer) { - try { - committer.abortTask(cntxt); - } catch (IOException e1) { - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); - } - } - throw new HCatException("Failed while writing", e); + committer.abortTask(cntxt); + } catch (IOException e1) { + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } - } - - @Override - public void commit(WriterContext context) throws HCatException { + } + throw new HCatException("Failed while writing", e); + } catch (InterruptedException e) { + if (null != committer) { try { - new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) - .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null)); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + committer.abortTask(cntxt); + } catch (IOException e1) { + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); } + } + throw new HCatException("Failed while writing", e); } + } - @Override - public void abort(WriterContext context) throws HCatException { - try { - new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) - .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext( - context.getConf(), null), State.FAILED); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } + @Override + public void commit(WriterContext context) throws HCatException { + try { + new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) + .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null)); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + } - private void setVarsInConf(int id) { - - // Following two config keys are required by FileOutputFormat to work - // correctly. - // In usual case of Hadoop, JobTracker will set these before launching - // tasks. - // Since there is no jobtracker here, we set it ourself. - conf.setInt("mapred.task.partition", id); - conf.set("mapred.task.id", "attempt__0000_r_000000_" + id); + @Override + public void abort(WriterContext context) throws HCatException { + try { + new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) + .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext( + context.getConf(), null), State.FAILED); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } + } + + private void setVarsInConf(int id) { + + // Following two config keys are required by FileOutputFormat to work + // correctly. + // In usual case of Hadoop, JobTracker will set these before launching + // tasks. + // Since there is no jobtracker here, we set it ourself. + conf.setInt("mapred.task.partition", id); + conf.set("mapred.task.id", "attempt__0000_r_000000_" + id); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/DefaultStateProvider.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/DefaultStateProvider.java index 9f5a5dc..3b08b2b 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/DefaultStateProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/DefaultStateProvider.java @@ -24,25 +24,25 @@ public class DefaultStateProvider implements StateProvider { - /** - * Default implementation. Here, ids are generated randomly. - */ - @Override - public int getId() { + /** + * Default implementation. Here, ids are generated randomly. + */ + @Override + public int getId() { - NumberFormat numberFormat = NumberFormat.getInstance(); - numberFormat.setMinimumIntegerDigits(5); - numberFormat.setGroupingUsed(false); - return Integer - .parseInt(numberFormat.format(Math.abs(new Random().nextInt()))); - } + NumberFormat numberFormat = NumberFormat.getInstance(); + numberFormat.setMinimumIntegerDigits(5); + numberFormat.setGroupingUsed(false); + return Integer + .parseInt(numberFormat.format(Math.abs(new Random().nextInt()))); + } - private static StateProvider sp; + private static StateProvider sp; - public static synchronized StateProvider get() { - if (null == sp) { - sp = new DefaultStateProvider(); - } - return sp; + public static synchronized StateProvider get() { + if (null == sp) { + sp = new DefaultStateProvider(); } + return sp; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/StateProvider.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/StateProvider.java index 767cd90..06bba68 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/StateProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/StateProvider.java @@ -26,10 +26,10 @@ */ public interface StateProvider { - /** - * This method should return id assigned to slave node. - * - * @return id - */ - public int getId(); + /** + * This method should return id assigned to slave node. + * + * @return id + */ + public int getId(); } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/har/HarOutputCommitterPostProcessor.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/har/HarOutputCommitterPostProcessor.java index 4086cbe..a4b7982 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/har/HarOutputCommitterPostProcessor.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/har/HarOutputCommitterPostProcessor.java @@ -34,92 +34,92 @@ public class HarOutputCommitterPostProcessor { - boolean isEnabled = false; + boolean isEnabled = false; - public boolean isEnabled() { - return isEnabled; - } + public boolean isEnabled() { + return isEnabled; + } - public void setEnabled(boolean enabled) { - this.isEnabled = enabled; - } + public void setEnabled(boolean enabled) { + this.isEnabled = enabled; + } - public void exec(JobContext context, Partition partition, Path partPath) throws IOException { + public void exec(JobContext context, Partition partition, Path partPath) throws IOException { // LOG.info("Archiving partition ["+partPath.toString()+"]"); - makeHar(context, partPath.toUri().toString(), harFile(partPath)); - partition.getParameters().put(hive_metastoreConstants.IS_ARCHIVED, "true"); - } + makeHar(context, partPath.toUri().toString(), harFile(partPath)); + partition.getParameters().put(hive_metastoreConstants.IS_ARCHIVED, "true"); + } - public String harFile(Path ptnPath) throws IOException { - String harFile = ptnPath.toString().replaceFirst("/+$", "") + ".har"; + public String harFile(Path ptnPath) throws IOException { + String harFile = ptnPath.toString().replaceFirst("/+$", "") + ".har"; // LOG.info("har file : " + harFile); - return harFile; - } + return harFile; + } - public String getParentFSPath(Path ptnPath) throws IOException { - return ptnPath.toUri().getPath().replaceFirst("/+$", ""); - } + public String getParentFSPath(Path ptnPath) throws IOException { + return ptnPath.toUri().getPath().replaceFirst("/+$", ""); + } - public String getProcessedLocation(Path ptnPath) throws IOException { - String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR; + public String getProcessedLocation(Path ptnPath) throws IOException { + String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR; // LOG.info("har location : " + harLocn); - return harLocn; - } + return harLocn; + } - /** - * Creates a har file from the contents of a given directory, using that as root. - * @param dir Directory to archive - * @param harFile The HAR file to create - */ - public static void makeHar(JobContext context, String dir, String harFile) throws IOException { + /** + * Creates a har file from the contents of a given directory, using that as root. + * @param dir Directory to archive + * @param harFile The HAR file to create + */ + public static void makeHar(JobContext context, String dir, String harFile) throws IOException { // Configuration conf = context.getConfiguration(); // Credentials creds = context.getCredentials(); // HCatUtil.logAllTokens(LOG,context); - int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR); - Path archivePath = new Path(harFile.substring(0, lastSep)); - final String[] args = { - "-archiveName", - harFile.substring(lastSep + 1, harFile.length()), - "-p", - dir, - "*", - archivePath.toString() - }; + int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR); + Path archivePath = new Path(harFile.substring(0, lastSep)); + final String[] args = { + "-archiveName", + harFile.substring(lastSep + 1, harFile.length()), + "-p", + dir, + "*", + archivePath.toString() + }; // for (String arg : args){ // LOG.info("Args to har : "+ arg); // } - try { - Configuration newConf = new Configuration(); - FileSystem fs = archivePath.getFileSystem(newConf); + try { + Configuration newConf = new Configuration(); + FileSystem fs = archivePath.getFileSystem(newConf); - String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION); - if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) { - newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting); + String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION); + if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) { + newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting); // LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+ System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]"); - } + } // for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){ // LOG.info("src : "+ds.getPath().toUri().toString()); // } - final HadoopArchives har = new HadoopArchives(newConf); - int rc = ToolRunner.run(har, args); - if (rc != 0) { - throw new Exception("Har returned error code " + rc); - } + final HadoopArchives har = new HadoopArchives(newConf); + int rc = ToolRunner.run(har, args); + if (rc != 0) { + throw new Exception("Har returned error code " + rc); + } // for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){ // LOG.info("dest : "+hs.getPath().toUri().toString()); // } // doHarCheck(fs,harFile); // LOG.info("Nuking " + dir); - fs.delete(new Path(dir), true); - } catch (Exception e) { - throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e); - } + fs.delete(new Path(dir), true); + } catch (Exception e) { + throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java index a11affc..cead40d 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java @@ -37,71 +37,71 @@ */ class DefaultOutputCommitterContainer extends OutputCommitterContainer { - private static final Logger LOG = LoggerFactory.getLogger(DefaultOutputCommitterContainer.class); + private static final Logger LOG = LoggerFactory.getLogger(DefaultOutputCommitterContainer.class); - /** - * @param context current JobContext - * @param baseCommitter OutputCommitter to contain - * @throws IOException - */ - public DefaultOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { - super(context, baseCommitter); - } + /** + * @param context current JobContext + * @param baseCommitter OutputCommitter to contain + * @throws IOException + */ + public DefaultOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { + super(context, baseCommitter); + } - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public void abortTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public void commitTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { + return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public void setupJob(JobContext context) throws IOException { - getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); - } + @Override + public void setupJob(JobContext context) throws IOException { + getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); + } - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + @Override + public void setupTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - getBaseOutputCommitter().abortJob(HCatMapRedUtil.createJobContext(jobContext), state); - cleanupJob(jobContext); - } + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + getBaseOutputCommitter().abortJob(HCatMapRedUtil.createJobContext(jobContext), state); + cleanupJob(jobContext); + } - @Override - public void commitJob(JobContext jobContext) throws IOException { - getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext)); - cleanupJob(jobContext); - } + @Override + public void commitJob(JobContext jobContext) throws IOException { + getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext)); + cleanupJob(jobContext); + } - @Override - public void cleanupJob(JobContext context) throws IOException { - getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context)); + @Override + public void cleanupJob(JobContext context) throws IOException { + getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context)); - //Cancel HCat and JobTracker tokens - HiveMetaStoreClient client = null; - try { - HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - client.cancelDelegationToken(tokenStrForm); - } - } catch (Exception e) { - LOG.warn("Failed to cancel delegation token", e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } + //Cancel HCat and JobTracker tokens + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + client.cancelDelegationToken(tokenStrForm); + } + } catch (Exception e) { + LOG.warn("Failed to cancel delegation token", e); + } finally { + HCatUtil.closeHiveClientQuietly(client); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputFormatContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputFormatContainer.java index c641a82..3a07b0c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputFormatContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputFormatContainer.java @@ -40,62 +40,62 @@ */ class DefaultOutputFormatContainer extends OutputFormatContainer { - private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); + private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); - static { - NUMBER_FORMAT.setMinimumIntegerDigits(5); - NUMBER_FORMAT.setGroupingUsed(false); - } + static { + NUMBER_FORMAT.setMinimumIntegerDigits(5); + NUMBER_FORMAT.setGroupingUsed(false); + } - public DefaultOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, Writable> of) { - super(of); - } + public DefaultOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, Writable> of) { + super(of); + } - static synchronized String getOutputName(int partition) { - return "part-" + NUMBER_FORMAT.format(partition); - } + static synchronized String getOutputName(int partition) { + return "part-" + NUMBER_FORMAT.format(partition); + } - /** - * Get the record writer for the job. Uses the storagehandler's OutputFormat - * to get the record writer. - * @param context the information about the current task. - * @return a RecordWriter to write the output for the job. - * @throws IOException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); - return new DefaultRecordWriterContainer(context, - getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); - } + /** + * Get the record writer for the job. Uses the storagehandler's OutputFormat + * to get the record writer. + * @param context the information about the current task. + * @return a RecordWriter to write the output for the job. + * @throws IOException + */ + @Override + public RecordWriter, HCatRecord> + getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); + return new DefaultRecordWriterContainer(context, + getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); + } - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) - throws IOException, InterruptedException { - return new DefaultOutputCommitterContainer(context, new JobConf(context.getConfiguration()).getOutputCommitter()); - } + /** + * Get the output committer for this output format. This is responsible + * for ensuring the output is committed correctly. + * @param context the task context + * @return an output committer + * @throws IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new DefaultOutputCommitterContainer(context, new JobConf(context.getConfiguration()).getOutputCommitter()); + } - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - org.apache.hadoop.mapred.OutputFormat, ? super Writable> outputFormat = getBaseOutputFormat(); - JobConf jobConf = new JobConf(context.getConfiguration()); - outputFormat.checkOutputSpecs(null, jobConf); - HCatUtil.copyConf(jobConf, context.getConfiguration()); - } + /** + * Check for validity of the output-specification for the job. + * @param context information about the job + * @throws IOException when output should not be attempted + */ + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + org.apache.hadoop.mapred.OutputFormat, ? super Writable> outputFormat = getBaseOutputFormat(); + JobConf jobConf = new JobConf(context.getConfiguration()); + outputFormat.checkOutputSpecs(null, jobConf); + HCatUtil.copyConf(jobConf, context.getConfiguration()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultRecordWriterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultRecordWriterContainer.java index 96587d4..9024e46 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultRecordWriterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultRecordWriterContainer.java @@ -37,46 +37,46 @@ */ class DefaultRecordWriterContainer extends RecordWriterContainer { - private final HCatStorageHandler storageHandler; - private final SerDe serDe; - private final OutputJobInfo jobInfo; - private final ObjectInspector hcatRecordOI; + private final HCatStorageHandler storageHandler; + private final SerDe serDe; + private final OutputJobInfo jobInfo; + private final ObjectInspector hcatRecordOI; - /** - * @param context current JobContext - * @param baseRecordWriter RecordWriter to contain - * @throws IOException - * @throws InterruptedException - */ - public DefaultRecordWriterContainer(TaskAttemptContext context, - org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { - super(context, baseRecordWriter); - jobInfo = HCatOutputFormat.getJobInfo(context); - storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - HCatOutputFormat.configureOutputStorageHandler(context); - serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); - hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); - try { - InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to initialize SerDe", e); - } + /** + * @param context current JobContext + * @param baseRecordWriter RecordWriter to contain + * @throws IOException + * @throws InterruptedException + */ + public DefaultRecordWriterContainer(TaskAttemptContext context, + org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { + super(context, baseRecordWriter); + jobInfo = HCatOutputFormat.getJobInfo(context); + storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + HCatOutputFormat.configureOutputStorageHandler(context); + serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); + hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); + try { + InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); + } catch (SerDeException e) { + throw new IOException("Failed to initialize SerDe", e); } + } - @Override - public void close(TaskAttemptContext context) throws IOException, - InterruptedException { - getBaseRecordWriter().close(InternalUtil.createReporter(context)); - } + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + getBaseRecordWriter().close(InternalUtil.createReporter(context)); + } - @Override - public void write(WritableComparable key, HCatRecord value) throws IOException, - InterruptedException { - try { - getBaseRecordWriter().write(null, serDe.serialize(value.getAll(), hcatRecordOI)); - } catch (SerDeException e) { - throw new IOException("Failed to serialize object", e); - } + @Override + public void write(WritableComparable key, HCatRecord value) throws IOException, + InterruptedException { + try { + getBaseRecordWriter().write(null, serDe.serialize(value.getAll(), hcatRecordOI)); + } catch (SerDeException e) { + throw new IOException("Failed to serialize object", e); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java index a843a5b..735e1ef 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -66,685 +66,685 @@ */ class FileOutputCommitterContainer extends OutputCommitterContainer { - private static final String TEMP_DIR_NAME = "_temporary"; - private static final String LOGS_DIR_NAME = "_logs"; - - private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class); - private final boolean dynamicPartitioningUsed; - private boolean partitionsDiscovered; - - private Map> partitionsDiscoveredByPath; - private Map contextDiscoveredByPath; - private final HCatStorageHandler cachedStorageHandler; - - HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor(); - - private String ptnRootLocation = null; - - private OutputJobInfo jobInfo = null; - - /** - * @param context current JobContext - * @param baseCommitter OutputCommitter to contain - * @throws IOException - */ - public FileOutputCommitterContainer(JobContext context, - org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { - super(context, baseCommitter); - jobInfo = HCatOutputFormat.getJobInfo(context); - dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); - - this.partitionsDiscovered = !dynamicPartitioningUsed; - cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + private static final String TEMP_DIR_NAME = "_temporary"; + private static final String LOGS_DIR_NAME = "_logs"; + + private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class); + private final boolean dynamicPartitioningUsed; + private boolean partitionsDiscovered; + + private Map> partitionsDiscoveredByPath; + private Map contextDiscoveredByPath; + private final HCatStorageHandler cachedStorageHandler; + + HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor(); + + private String ptnRootLocation = null; + + private OutputJobInfo jobInfo = null; + + /** + * @param context current JobContext + * @param baseCommitter OutputCommitter to contain + * @throws IOException + */ + public FileOutputCommitterContainer(JobContext context, + org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { + super(context, baseCommitter); + jobInfo = HCatOutputFormat.getJobInfo(context); + dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); + + this.partitionsDiscovered = !dynamicPartitioningUsed; + cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + } + + @Override + public void abortTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); } - - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + } + + @Override + public void commitTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + //See HCATALOG-499 + FileOutputFormatContainer.setWorkOutputPath(context); + getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); } - - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - //See HCATALOG-499 - FileOutputFormatContainer.setWorkOutputPath(context); - getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); + } else { + // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default + return false; } + } - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); - } else { - // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default - return false; - } + @Override + public void setupJob(JobContext context) throws IOException { + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); } + // in dynamic usecase, called through FileRecordWriterContainer + } - @Override - public void setupJob(JobContext context) throws IOException { - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); - } - // in dynamic usecase, called through FileRecordWriterContainer + @Override + public void setupTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); } - - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + try { + if (dynamicPartitioningUsed) { + discoverPartitions(jobContext); + } + org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil + .createJobContext(jobContext); + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().abortJob(mapRedJobContext, state); + } else if (dynamicPartitioningUsed) { + for (JobContext currContext : contextDiscoveredByPath.values()) { + try { + new JobConf(currContext.getConfiguration()) + .getOutputCommitter().abortJob(currContext, + state); + } catch (Exception e) { + throw new IOException(e); + } } + } + Path src; + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); + if (dynamicPartitioningUsed) { + src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() + .getPartitionKeysSize())); + } else { + src = new Path(jobInfo.getLocation()); + } + FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); + // Note fs.delete will fail on Windows. The reason is in OutputCommitter, + // Hadoop is still writing to _logs/history. On Linux, OS don't care file is still + // open and remove the directory anyway, but on Windows, OS refuse to remove a + // directory containing open files. So on Windows, we will leave output directory + // behind when job fail. User needs to remove the output directory manually + LOG.info("Job failed. Try cleaning up temporary directory [{}].", src); + fs.delete(src, true); + } finally { + cancelDelegationTokens(jobContext); } - - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - try { - if (dynamicPartitioningUsed) { - discoverPartitions(jobContext); - } - org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil - .createJobContext(jobContext); - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().abortJob(mapRedJobContext, state); - } else if (dynamicPartitioningUsed) { - for (JobContext currContext : contextDiscoveredByPath.values()) { - try { - new JobConf(currContext.getConfiguration()) - .getOutputCommitter().abortJob(currContext, - state); - } catch (Exception e) { - throw new IOException(e); - } - } - } - Path src; - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - if (dynamicPartitioningUsed) { - src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() - .getPartitionKeysSize())); - } else { - src = new Path(jobInfo.getLocation()); - } - FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); - // Note fs.delete will fail on Windows. The reason is in OutputCommitter, - // Hadoop is still writing to _logs/history. On Linux, OS don't care file is still - // open and remove the directory anyway, but on Windows, OS refuse to remove a - // directory containing open files. So on Windows, we will leave output directory - // behind when job fail. User needs to remove the output directory manually - LOG.info("Job failed. Try cleaning up temporary directory [{}].", src); - fs.delete(src, true); - } finally { - cancelDelegationTokens(jobContext); + } + + public static final String SUCCEEDED_FILE_NAME = "_SUCCESS"; + static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = + "mapreduce.fileoutputcommitter.marksuccessfuljobs"; + + private static boolean getOutputDirMarking(Configuration conf) { + return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, + false); + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + try { + if (dynamicPartitioningUsed) { + discoverPartitions(jobContext); + // Commit each partition so it gets moved out of the job work + // dir + for (JobContext context : contextDiscoveredByPath.values()) { + new JobConf(context.getConfiguration()) + .getOutputCommitter().commitJob(context); } - } - - public static final String SUCCEEDED_FILE_NAME = "_SUCCESS"; - static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = - "mapreduce.fileoutputcommitter.marksuccessfuljobs"; - - private static boolean getOutputDirMarking(Configuration conf) { - return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, - false); - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - try { - if (dynamicPartitioningUsed) { - discoverPartitions(jobContext); - // Commit each partition so it gets moved out of the job work - // dir - for (JobContext context : contextDiscoveredByPath.values()) { - new JobConf(context.getConfiguration()) - .getOutputCommitter().commitJob(context); - } - } - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().commitJob( - HCatMapRedUtil.createJobContext(jobContext)); - } - registerPartitions(jobContext); - // create _SUCCESS FILE if so requested. - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - if (getOutputDirMarking(jobContext.getConfiguration())) { - Path outputPath = new Path(jobInfo.getLocation()); - FileSystem fileSys = outputPath.getFileSystem(jobContext - .getConfiguration()); - // create a file in the folder to mark it - if (fileSys.exists(outputPath)) { - Path filePath = new Path(outputPath, - SUCCEEDED_FILE_NAME); - if (!fileSys.exists(filePath)) { // may have been - // created by - // baseCommitter.commitJob() - fileSys.create(filePath).close(); - } - } - } - } finally { - cancelDelegationTokens(jobContext); + } + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().commitJob( + HCatMapRedUtil.createJobContext(jobContext)); + } + registerPartitions(jobContext); + // create _SUCCESS FILE if so requested. + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); + if (getOutputDirMarking(jobContext.getConfiguration())) { + Path outputPath = new Path(jobInfo.getLocation()); + FileSystem fileSys = outputPath.getFileSystem(jobContext + .getConfiguration()); + // create a file in the folder to mark it + if (fileSys.exists(outputPath)) { + Path filePath = new Path(outputPath, + SUCCEEDED_FILE_NAME); + if (!fileSys.exists(filePath)) { // may have been + // created by + // baseCommitter.commitJob() + fileSys.create(filePath).close(); + } } + } + } finally { + cancelDelegationTokens(jobContext); } - - @Override - public void cleanupJob(JobContext context) throws IOException { - throw new IOException("The method cleanupJob is deprecated and should not be called."); - } - - private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { - if (ptnRootLocation == null) { - // we only need to calculate it once, it'll be the same for other partitions in this job. - Path ptnRoot = new Path(ptnLocn); - for (int i = 0; i < numPtnKeys; i++) { + } + + @Override + public void cleanupJob(JobContext context) throws IOException { + throw new IOException("The method cleanupJob is deprecated and should not be called."); + } + + private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { + if (ptnRootLocation == null) { + // we only need to calculate it once, it'll be the same for other partitions in this job. + Path ptnRoot = new Path(ptnLocn); + for (int i = 0; i < numPtnKeys; i++) { // LOG.info("Getting parent of "+ptnRoot.getName()); - ptnRoot = ptnRoot.getParent(); - } - ptnRootLocation = ptnRoot.toString(); - } + ptnRoot = ptnRoot.getParent(); + } + ptnRootLocation = ptnRoot.toString(); + } // LOG.info("Returning final parent : "+ptnRootLocation); - return ptnRootLocation; + return ptnRootLocation; + } + + /** + * Generate partition metadata object to be used to add to metadata. + * @param context The job context. + * @param jobInfo The OutputJobInfo. + * @param partLocnRoot The table-equivalent location root of the partition + * (temporary dir if dynamic partition, table dir if static) + * @param partKVs The keyvalue pairs that form the partition + * @param outputSchema The output schema for the partition + * @param params The parameters to store inside the partition + * @param table The Table metadata object under which this Partition will reside + * @param fs FileSystem object to operate on the underlying filesystem + * @param grpName Group name that owns the table dir + * @param perms FsPermission that's the default permission of the table dir. + * @return Constructed Partition metadata object + * @throws java.io.IOException + */ + + private Partition constructPartition( + JobContext context, OutputJobInfo jobInfo, + String partLocnRoot, Map partKVs, + HCatSchema outputSchema, Map params, + Table table, FileSystem fs, + String grpName, FsPermission perms) throws IOException { + + Partition partition = new Partition(); + partition.setDbName(table.getDbName()); + partition.setTableName(table.getTableName()); + partition.setSd(new StorageDescriptor(table.getTTable().getSd())); + + List fields = new ArrayList(); + for (HCatFieldSchema fieldSchema : outputSchema.getFields()) { + fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema)); } - /** - * Generate partition metadata object to be used to add to metadata. - * @param context The job context. - * @param jobInfo The OutputJobInfo. - * @param partLocnRoot The table-equivalent location root of the partition - * (temporary dir if dynamic partition, table dir if static) - * @param partKVs The keyvalue pairs that form the partition - * @param outputSchema The output schema for the partition - * @param params The parameters to store inside the partition - * @param table The Table metadata object under which this Partition will reside - * @param fs FileSystem object to operate on the underlying filesystem - * @param grpName Group name that owns the table dir - * @param perms FsPermission that's the default permission of the table dir. - * @return Constructed Partition metadata object - * @throws java.io.IOException - */ - - private Partition constructPartition( - JobContext context, OutputJobInfo jobInfo, - String partLocnRoot, Map partKVs, - HCatSchema outputSchema, Map params, - Table table, FileSystem fs, - String grpName, FsPermission perms) throws IOException { - - Partition partition = new Partition(); - partition.setDbName(table.getDbName()); - partition.setTableName(table.getTableName()); - partition.setSd(new StorageDescriptor(table.getTTable().getSd())); - - List fields = new ArrayList(); - for (HCatFieldSchema fieldSchema : outputSchema.getFields()) { - fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema)); - } - - partition.getSd().setCols(fields); + partition.getSd().setCols(fields); - partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs)); + partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs)); - partition.setParameters(params); - - // Sets permissions and group name on partition dirs and files. - - Path partPath; - if (Boolean.valueOf((String)table.getProperty("EXTERNAL")) - && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { - // honor external table that specifies the location - partPath = new Path(jobInfo.getLocation()); - } else { - partPath = new Path(partLocnRoot); - int i = 0; - for (FieldSchema partKey : table.getPartitionKeys()) { - if (i++ != 0) { - applyGroupAndPerms(fs, partPath, perms, grpName, false); - } - partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); - } - } + partition.setParameters(params); - // Apply the group and permissions to the leaf partition and files. - // Need not bother in case of HDFS as permission is taken care of by setting UMask - if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) { - applyGroupAndPerms(fs, partPath, perms, grpName, true); - } + // Sets permissions and group name on partition dirs and files. - // Set the location in the StorageDescriptor - if (dynamicPartitioningUsed) { - String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs); - if (harProcessor.isEnabled()) { - harProcessor.exec(context, partition, partPath); - partition.getSd().setLocation( - harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination))); - } else { - partition.getSd().setLocation(dynamicPartitionDestination); - } - } else { - partition.getSd().setLocation(partPath.toString()); + Path partPath; + if (Boolean.valueOf((String)table.getProperty("EXTERNAL")) + && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { + // honor external table that specifies the location + partPath = new Path(jobInfo.getLocation()); + } else { + partPath = new Path(partLocnRoot); + int i = 0; + for (FieldSchema partKey : table.getPartitionKeys()) { + if (i++ != 0) { + applyGroupAndPerms(fs, partPath, perms, grpName, false); } - return partition; + partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); + } } - private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, - String group, boolean recursive) - throws IOException { - fs.setPermission(dir, permission); - if (recursive) { - for (FileStatus fileStatus : fs.listStatus(dir)) { - if (fileStatus.isDir()) { - applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true); - } else { - fs.setPermission(fileStatus.getPath(), permission); - } - } - } + // Apply the group and permissions to the leaf partition and files. + // Need not bother in case of HDFS as permission is taken care of by setting UMask + if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) { + applyGroupAndPerms(fs, partPath, perms, grpName, true); } - private String getFinalDynamicPartitionDestination(Table table, Map partKVs) { - // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> - // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA - Path partPath = new Path(table.getTTable().getSd().getLocation()); - for (FieldSchema partKey : table.getPartitionKeys()) { - partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); - } - return partPath.toString(); + // Set the location in the StorageDescriptor + if (dynamicPartitioningUsed) { + String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs); + if (harProcessor.isEnabled()) { + harProcessor.exec(context, partition, partPath); + partition.getSd().setLocation( + harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination))); + } else { + partition.getSd().setLocation(dynamicPartitionDestination); + } + } else { + partition.getSd().setLocation(partPath.toString()); } - - private Map getStorerParameterMap(StorerInfo storer) { - Map params = new HashMap(); - - //Copy table level hcat.* keys to the partition - for (Entry entry : storer.getProperties().entrySet()) { - params.put(entry.getKey().toString(), entry.getValue().toString()); + return partition; + } + + private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, + String group, boolean recursive) + throws IOException { + fs.setPermission(dir, permission); + if (recursive) { + for (FileStatus fileStatus : fs.listStatus(dir)) { + if (fileStatus.isDir()) { + applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true); + } else { + fs.setPermission(fileStatus.getPath(), permission); } - return params; + } } + } + + private String getFinalDynamicPartitionDestination(Table table, Map partKVs) { + // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> + // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA + Path partPath = new Path(table.getTTable().getSd().getLocation()); + for (FieldSchema partKey : table.getPartitionKeys()) { + partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); + } + return partPath.toString(); + } - private Path constructPartialPartPath(Path partialPath, String partKey, Map partKVs) { + private Map getStorerParameterMap(StorerInfo storer) { + Map params = new HashMap(); - StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey)); - sb.append("="); - sb.append(FileUtils.escapePathName(partKVs.get(partKey))); - return new Path(partialPath, sb.toString()); + //Copy table level hcat.* keys to the partition + for (Entry entry : storer.getProperties().entrySet()) { + params.put(entry.getKey().toString(), entry.getValue().toString()); } - - /** - * Update table schema, adding new columns as added for the partition. - * @param client the client - * @param table the table - * @param partitionSchema the schema of the partition - * @throws java.io.IOException Signals that an I/O exception has occurred. - * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception - * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception - * @throws org.apache.thrift.TException the t exception - */ - private void updateTableSchema(HiveMetaStoreClient client, Table table, - HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException { - - - List newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema); - - if (newColumns.size() != 0) { - List tableColumns = new ArrayList(table.getTTable().getSd().getCols()); - tableColumns.addAll(newColumns); - - //Update table schema to add the newly added columns - table.getTTable().getSd().setCols(tableColumns); - client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); - } + return params; + } + + private Path constructPartialPartPath(Path partialPath, String partKey, Map partKVs) { + + StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey)); + sb.append("="); + sb.append(FileUtils.escapePathName(partKVs.get(partKey))); + return new Path(partialPath, sb.toString()); + } + + /** + * Update table schema, adding new columns as added for the partition. + * @param client the client + * @param table the table + * @param partitionSchema the schema of the partition + * @throws java.io.IOException Signals that an I/O exception has occurred. + * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception + * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception + * @throws org.apache.thrift.TException the t exception + */ + private void updateTableSchema(HiveMetaStoreClient client, Table table, + HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException { + + + List newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema); + + if (newColumns.size() != 0) { + List tableColumns = new ArrayList(table.getTTable().getSd().getCols()); + tableColumns.addAll(newColumns); + + //Update table schema to add the newly added columns + table.getTTable().getSd().setCols(tableColumns); + client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); } - - /** - * Move all of the files from the temp directory to the final location - * @param fs the output file system - * @param file the file to move - * @param srcDir the source directory - * @param destDir the target directory - * @param dryRun - a flag that simply tests if this move would succeed or not based - * on whether other files exist where we're trying to copy - * @throws java.io.IOException - */ - private void moveTaskOutputs(FileSystem fs, - Path file, - Path srcDir, - Path destDir, final boolean dryRun) throws IOException { - - if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { - return; + } + + /** + * Move all of the files from the temp directory to the final location + * @param fs the output file system + * @param file the file to move + * @param srcDir the source directory + * @param destDir the target directory + * @param dryRun - a flag that simply tests if this move would succeed or not based + * on whether other files exist where we're trying to copy + * @throws java.io.IOException + */ + private void moveTaskOutputs(FileSystem fs, + Path file, + Path srcDir, + Path destDir, final boolean dryRun) throws IOException { + + if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { + return; + } + final Path finalOutputPath = getFinalPath(file, srcDir, destDir); + if (fs.isFile(file)) { + if (dryRun){ + if(LOG.isDebugEnabled()) { + LOG.debug("Testing if moving file: [" + file + "] to [" + + finalOutputPath + "] would cause a problem"); + } + if (fs.exists(finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + + ", duplicate publish not possible."); + } + } else { + if(LOG.isDebugEnabled()) { + LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); } - final Path finalOutputPath = getFinalPath(file, srcDir, destDir); - if (fs.isFile(file)) { - if (dryRun){ - if(LOG.isDebugEnabled()) { - LOG.debug("Testing if moving file: [" + file + "] to [" - + finalOutputPath + "] would cause a problem"); - } - if (fs.exists(finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath - + ", duplicate publish not possible."); - } - } else { - if(LOG.isDebugEnabled()) { - LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); - } - // Make sure the parent directory exists. It is not an error - // to recreate an existing directory - fs.mkdirs(finalOutputPath.getParent()); - if (!fs.rename(file, finalOutputPath)) { - if (!fs.delete(finalOutputPath, true)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); - } - if (!fs.rename(file, finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); - } - } + // Make sure the parent directory exists. It is not an error + // to recreate an existing directory + fs.mkdirs(finalOutputPath.getParent()); + if (!fs.rename(file, finalOutputPath)) { + if (!fs.delete(finalOutputPath, true)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); + } + if (!fs.rename(file, finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); + } + } + } + } else if(fs.getFileStatus(file).isDir()) { + FileStatus[] children = fs.listStatus(file); + FileStatus firstChild = null; + if (children != null) { + int index=0; + while (index < children.length) { + if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { + firstChild = children[index]; + break; + } + index++; + } + } + if(firstChild!=null && firstChild.isDir()) { + // If the first child is directory, then rest would be directory too according to HCatalog dir structure + // recurse in that case + for (FileStatus child : children) { + moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); + } + } else { + + if (!dryRun) { + if (dynamicPartitioningUsed) { + // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself + // instead of moving each file under the directory. See HCATALOG-538 + + final Path parentDir = finalOutputPath.getParent(); + // Create the directory + Path placeholder = new Path(parentDir, "_placeholder"); + if (fs.mkdirs(parentDir)) { + // It is weired but we need a placeholder, + // otherwise rename cannot move file to the right place + fs.create(placeholder).close(); } - } else if(fs.getFileStatus(file).isDir()) { - FileStatus[] children = fs.listStatus(file); - FileStatus firstChild = null; - if (children != null) { - int index=0; - while (index < children.length) { - if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { - firstChild = children[index]; - break; - } - index++; - } + if (LOG.isDebugEnabled()) { + LOG.debug("Moving directory: " + file + " to " + parentDir); } - if(firstChild!=null && firstChild.isDir()) { - // If the first child is directory, then rest would be directory too according to HCatalog dir structure - // recurse in that case - for (FileStatus child : children) { - moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); - } - } else { - - if (!dryRun) { - if (dynamicPartitioningUsed) { - // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself - // instead of moving each file under the directory. See HCATALOG-538 - - final Path parentDir = finalOutputPath.getParent(); - // Create the directory - Path placeholder = new Path(parentDir, "_placeholder"); - if (fs.mkdirs(parentDir)) { - // It is weired but we need a placeholder, - // otherwise rename cannot move file to the right place - fs.create(placeholder).close(); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Moving directory: " + file + " to " + parentDir); - } - if (!fs.rename(file, parentDir)) { - final String msg = "Failed to move file: " + file + " to " + parentDir; - LOG.error(msg); - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); - } - fs.delete(placeholder, false); - } else { - // In case of no partition we have to move each file - for (FileStatus child : children) { - moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); - } - } - } else { - if(fs.exists(finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath - + ", duplicate publish not possible."); - } - } + if (!fs.rename(file, parentDir)) { + final String msg = "Failed to move file: " + file + " to " + parentDir; + LOG.error(msg); + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } + fs.delete(placeholder, false); + } else { + // In case of no partition we have to move each file + for (FileStatus child : children) { + moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); + } + } } else { - // Should never happen - final String msg = "Unknown file type being asked to be moved, erroring out"; - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); + if(fs.exists(finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + + ", duplicate publish not possible."); + } } + } + } else { + // Should never happen + final String msg = "Unknown file type being asked to be moved, erroring out"; + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); } - - /** - * Find the final name of a given output file, given the output directory - * and the work directory. - * @param file the file to move - * @param src the source directory - * @param dest the target directory - * @return the final path for the specific output file - * @throws java.io.IOException - */ - private Path getFinalPath(Path file, Path src, - Path dest) throws IOException { - URI taskOutputUri = file.toUri(); - URI relativePath = src.toUri().relativize(taskOutputUri); - if (taskOutputUri == relativePath) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + - src + " child = " + file); + } + + /** + * Find the final name of a given output file, given the output directory + * and the work directory. + * @param file the file to move + * @param src the source directory + * @param dest the target directory + * @return the final path for the specific output file + * @throws java.io.IOException + */ + private Path getFinalPath(Path file, Path src, + Path dest) throws IOException { + URI taskOutputUri = file.toUri(); + URI relativePath = src.toUri().relativize(taskOutputUri); + if (taskOutputUri == relativePath) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + + src + " child = " + file); + } + if (relativePath.getPath().length() > 0) { + return new Path(dest, relativePath.getPath()); + } else { + return dest; + } + } + + /** + * Run to discover dynamic partitions available + */ + private void discoverPartitions(JobContext context) throws IOException { + if (!partitionsDiscovered) { + // LOG.info("discover ptns called"); + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + + harProcessor.setEnabled(jobInfo.getHarRequested()); + + List dynamicPartCols = jobInfo.getPosOfDynPartCols(); + int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); + + Path loadPath = new Path(jobInfo.getLocation()); + FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); + + // construct a path pattern (e.g., /*/*) to find all dynamically generated paths + String dynPathSpec = loadPath.toUri().getPath(); + dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); + + // LOG.info("Searching for "+dynPathSpec); + Path pathPattern = new Path(dynPathSpec); + FileStatus[] status = fs.globStatus(pathPattern); + + partitionsDiscoveredByPath = new LinkedHashMap>(); + contextDiscoveredByPath = new LinkedHashMap(); + + + if (status.length == 0) { + // LOG.warn("No partition found genereated by dynamic partitioning in [" + // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() + // +"], dynSpec["+dynPathSpec+"]"); + } else { + if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { + this.partitionsDiscovered = true; + throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, + "Number of dynamic partitions being created " + + "exceeds configured max allowable partitions[" + + maxDynamicPartitions + + "], increase parameter [" + + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + + "] if needed."); } - if (relativePath.getPath().length() > 0) { - return new Path(dest, relativePath.getPath()); - } else { - return dest; + + for (FileStatus st : status) { + LinkedHashMap fullPartSpec = new LinkedHashMap(); + Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); + partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); + JobConf jobConf = (JobConf)context.getConfiguration(); + JobContext currContext = HCatMapRedUtil.createJobContext( + jobConf, + context.getJobID(), + InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, + ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))); + HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); + contextDiscoveredByPath.put(st.getPath().toString(), currContext); } - } + } - /** - * Run to discover dynamic partitions available - */ - private void discoverPartitions(JobContext context) throws IOException { - if (!partitionsDiscovered) { - // LOG.info("discover ptns called"); - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - - harProcessor.setEnabled(jobInfo.getHarRequested()); - - List dynamicPartCols = jobInfo.getPosOfDynPartCols(); - int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - - Path loadPath = new Path(jobInfo.getLocation()); - FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); - - // construct a path pattern (e.g., /*/*) to find all dynamically generated paths - String dynPathSpec = loadPath.toUri().getPath(); - dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); - - // LOG.info("Searching for "+dynPathSpec); - Path pathPattern = new Path(dynPathSpec); - FileStatus[] status = fs.globStatus(pathPattern); - - partitionsDiscoveredByPath = new LinkedHashMap>(); - contextDiscoveredByPath = new LinkedHashMap(); - - - if (status.length == 0) { - // LOG.warn("No partition found genereated by dynamic partitioning in [" - // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() - // +"], dynSpec["+dynPathSpec+"]"); - } else { - if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { - this.partitionsDiscovered = true; - throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, - "Number of dynamic partitions being created " - + "exceeds configured max allowable partitions[" - + maxDynamicPartitions - + "], increase parameter [" - + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname - + "] if needed."); - } - - for (FileStatus st : status) { - LinkedHashMap fullPartSpec = new LinkedHashMap(); - Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); - partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); - JobConf jobConf = (JobConf)context.getConfiguration(); - JobContext currContext = HCatMapRedUtil.createJobContext( - jobConf, - context.getJobID(), - InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, - ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))); - HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); - contextDiscoveredByPath.put(st.getPath().toString(), currContext); - } - } + // for (Entry> spec : partitionsDiscoveredByPath.entrySet()){ + // LOG.info("Partition "+ spec.getKey()); + // for (Entry e : spec.getValue().entrySet()){ + // LOG.info(e.getKey() + "=>" +e.getValue()); + // } + // } - // for (Entry> spec : partitionsDiscoveredByPath.entrySet()){ - // LOG.info("Partition "+ spec.getKey()); - // for (Entry e : spec.getValue().entrySet()){ - // LOG.info(e.getKey() + "=>" +e.getValue()); - // } - // } + this.partitionsDiscovered = true; + } + } - this.partitionsDiscovered = true; - } + private void registerPartitions(JobContext context) throws IOException{ + if (dynamicPartitioningUsed){ + discoverPartitions(context); + } + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + Configuration conf = context.getConfiguration(); + Table table = new Table(jobInfo.getTableInfo().getTable()); + Path tblPath = new Path(table.getTTable().getSd().getLocation()); + FileSystem fs = tblPath.getFileSystem(conf); + + if( table.getPartitionKeys().size() == 0 ) { + //Move data from temp directory the actual table directory + //No metastore operation required. + Path src = new Path(jobInfo.getLocation()); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); + return; } - private void registerPartitions(JobContext context) throws IOException{ - if (dynamicPartitioningUsed){ - discoverPartitions(context); + HiveMetaStoreClient client = null; + HCatTableInfo tableInfo = jobInfo.getTableInfo(); + List partitionsAdded = new ArrayList(); + try { + HiveConf hiveConf = HCatUtil.getHiveConf(conf); + client = HCatUtil.getHiveClient(hiveConf); + StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters()); + + FileStatus tblStat = fs.getFileStatus(tblPath); + String grpName = tblStat.getGroup(); + FsPermission perms = tblStat.getPermission(); + + List partitionsToAdd = new ArrayList(); + if (!dynamicPartitioningUsed){ + partitionsToAdd.add( + constructPartition( + context,jobInfo, + tblPath.toString(), jobInfo.getPartitionValues() + ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) + ,table, fs + ,grpName,perms)); + }else{ + for (Entry> entry : partitionsDiscoveredByPath.entrySet()){ + partitionsToAdd.add( + constructPartition( + context,jobInfo, + getPartitionRootLocation(entry.getKey(),entry.getValue().size()), entry.getValue() + ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) + ,table, fs + ,grpName,perms)); } - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - Configuration conf = context.getConfiguration(); - Table table = new Table(jobInfo.getTableInfo().getTable()); - Path tblPath = new Path(table.getTTable().getSd().getLocation()); - FileSystem fs = tblPath.getFileSystem(conf); - - if( table.getPartitionKeys().size() == 0 ) { - //Move data from temp directory the actual table directory - //No metastore operation required. - Path src = new Path(jobInfo.getLocation()); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - return; - } - - HiveMetaStoreClient client = null; - HCatTableInfo tableInfo = jobInfo.getTableInfo(); - List partitionsAdded = new ArrayList(); + } + + ArrayList> ptnInfos = new ArrayList>(); + for(Partition ptn : partitionsToAdd){ + ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn)); + } + + //Publish the new partition(s) + if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())){ + + Path src = new Path(ptnRootLocation); + // check here for each dir we're copying out, to see if it + // already exists, error out if so + moveTaskOutputs(fs, src, src, tblPath, true); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); try { - HiveConf hiveConf = HCatUtil.getHiveConf(conf); - client = HCatUtil.getHiveClient(hiveConf); - StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters()); - - FileStatus tblStat = fs.getFileStatus(tblPath); - String grpName = tblStat.getGroup(); - FsPermission perms = tblStat.getPermission(); - - List partitionsToAdd = new ArrayList(); - if (!dynamicPartitioningUsed){ - partitionsToAdd.add( - constructPartition( - context,jobInfo, - tblPath.toString(), jobInfo.getPartitionValues() - ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) - ,table, fs - ,grpName,perms)); - }else{ - for (Entry> entry : partitionsDiscoveredByPath.entrySet()){ - partitionsToAdd.add( - constructPartition( - context,jobInfo, - getPartitionRootLocation(entry.getKey(),entry.getValue().size()), entry.getValue() - ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) - ,table, fs - ,grpName,perms)); - } + updateTableSchema(client, table, jobInfo.getOutputSchema()); + LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); + client.add_partitions(partitionsToAdd); + partitionsAdded = partitionsToAdd; + } catch (Exception e){ + // There was an error adding partitions : rollback fs copy and rethrow + for (Partition p : partitionsToAdd){ + Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation()))); + if (fs.exists(ptnPath)){ + fs.delete(ptnPath,true); } - - ArrayList> ptnInfos = new ArrayList>(); - for(Partition ptn : partitionsToAdd){ - ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn)); - } - - //Publish the new partition(s) - if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())){ - - Path src = new Path(ptnRootLocation); - // check here for each dir we're copying out, to see if it - // already exists, error out if so - moveTaskOutputs(fs, src, src, tblPath, true); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - try { - updateTableSchema(client, table, jobInfo.getOutputSchema()); - LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); - client.add_partitions(partitionsToAdd); - partitionsAdded = partitionsToAdd; - } catch (Exception e){ - // There was an error adding partitions : rollback fs copy and rethrow - for (Partition p : partitionsToAdd){ - Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation()))); - if (fs.exists(ptnPath)){ - fs.delete(ptnPath,true); - } - } - throw e; - } - - }else{ - // no harProcessor, regular operation - updateTableSchema(client, table, jobInfo.getOutputSchema()); - LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); - if (dynamicPartitioningUsed && (partitionsToAdd.size()>0)){ - Path src = new Path(ptnRootLocation); - moveTaskOutputs(fs, src, src, tblPath, true); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - } - client.add_partitions(partitionsToAdd); - partitionsAdded = partitionsToAdd; - } - } catch (Exception e) { - if (partitionsAdded.size() > 0) { - try { - // baseCommitter.cleanupJob failed, try to clean up the - // metastore - for (Partition p : partitionsAdded) { - client.dropPartition(tableInfo.getDatabaseName(), - tableInfo.getTableName(), p.getValues()); - } - } catch (Exception te) { - // Keep cause as the original exception - throw new HCatException( - ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } finally { - HCatUtil.closeHiveClientQuietly(client); + } + throw e; } - } - private void cancelDelegationTokens(JobContext context) throws IOException{ - LOG.info("Cancelling deletgation token for the job."); - HiveMetaStoreClient client = null; + }else{ + // no harProcessor, regular operation + updateTableSchema(client, table, jobInfo.getOutputSchema()); + LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); + if (dynamicPartitioningUsed && (partitionsToAdd.size()>0)){ + Path src = new Path(ptnRootLocation); + moveTaskOutputs(fs, src, src, tblPath, true); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); + } + client.add_partitions(partitionsToAdd); + partitionsAdded = partitionsToAdd; + } + } catch (Exception e) { + if (partitionsAdded.size() > 0) { try { - HiveConf hiveConf = HCatUtil - .getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - // cancel the deleg. tokens that were acquired for this job now that - // we are done - we should cancel if the tokens were acquired by - // HCatOutputFormat and not if they were supplied by Oozie. - // In the latter case the HCAT_KEY_TOKEN_SIGNATURE property in - // the conf will not be set - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null - && context.getConfiguration().get( - HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - client.cancelDelegationToken(tokenStrForm); - } - } catch (MetaException e) { - LOG.warn("MetaException while cancelling delegation token.", e); - } catch (TException e) { - LOG.warn("TException while cancelling delegation token.", e); - } finally { - HCatUtil.closeHiveClientQuietly(client); + // baseCommitter.cleanupJob failed, try to clean up the + // metastore + for (Partition p : partitionsAdded) { + client.dropPartition(tableInfo.getDatabaseName(), + tableInfo.getTableName(), p.getValues()); + } + } catch (Exception te) { + // Keep cause as the original exception + throw new HCatException( + ErrorType.ERROR_PUBLISHING_PARTITION, e); } + } + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); + } + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + } + + private void cancelDelegationTokens(JobContext context) throws IOException{ + LOG.info("Cancelling deletgation token for the job."); + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil + .getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + // cancel the deleg. tokens that were acquired for this job now that + // we are done - we should cancel if the tokens were acquired by + // HCatOutputFormat and not if they were supplied by Oozie. + // In the latter case the HCAT_KEY_TOKEN_SIGNATURE property in + // the conf will not be set + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null + && context.getConfiguration().get( + HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + client.cancelDelegationToken(tokenStrForm); + } + } catch (MetaException e) { + LOG.warn("MetaException while cancelling delegation token.", e); + } catch (TException e) { + LOG.warn("TException while cancelling delegation token.", e); + } finally { + HCatUtil.closeHiveClientQuietly(client); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java index fb306d8..d9d0f85 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java @@ -59,194 +59,194 @@ */ class FileOutputFormatContainer extends OutputFormatContainer { - private static final PathFilter hiddenFileFilter = new PathFilter() { - public boolean accept(Path p) { - String name = p.getName(); - return !name.startsWith("_") && !name.startsWith("."); - } - }; - - /** - * @param of base OutputFormat to contain - */ - public FileOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { - super(of); + private static final PathFilter hiddenFileFilter = new PathFilter() { + public boolean accept(Path p) { + String name = p.getName(); + return !name.startsWith("_") && !name.startsWith("."); } - - @Override - public RecordWriter, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - //this needs to be manually set, under normal circumstances MR Task does this - setWorkOutputPath(context); - - //Configure the output key and value classes. - // This is required for writing null as key for file based tables. - context.getConfiguration().set("mapred.output.key.class", - NullWritable.class.getName()); - String jobInfoString = context.getConfiguration().get( - HCatConstants.HCAT_KEY_OUTPUT_INFO); - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil - .deserialize(jobInfoString); - StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( - context.getConfiguration(), storeInfo); - Class serde = storageHandler.getSerDeClass(); - SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, - context.getConfiguration()); - context.getConfiguration().set("mapred.output.value.class", - sd.getSerializedClass().getName()); - - RecordWriter, HCatRecord> rw; - if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){ - // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. - // (That's because records can't be written until the values of the dynamic partitions are deduced. - // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) - rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context); - } else { - Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); - Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part")); - - rw = new FileRecordWriterContainer( - getBaseOutputFormat().getRecordWriter( - parentDir.getFileSystem(context.getConfiguration()), - new JobConf(context.getConfiguration()), - childPath.toString(), - InternalUtil.createReporter(context)), - context); - } - return rw; + }; + + /** + * @param of base OutputFormat to contain + */ + public FileOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { + super(of); + } + + @Override + public RecordWriter, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + //this needs to be manually set, under normal circumstances MR Task does this + setWorkOutputPath(context); + + //Configure the output key and value classes. + // This is required for writing null as key for file based tables. + context.getConfiguration().set("mapred.output.key.class", + NullWritable.class.getName()); + String jobInfoString = context.getConfiguration().get( + HCatConstants.HCAT_KEY_OUTPUT_INFO); + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil + .deserialize(jobInfoString); + StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( + context.getConfiguration(), storeInfo); + Class serde = storageHandler.getSerDeClass(); + SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, + context.getConfiguration()); + context.getConfiguration().set("mapred.output.value.class", + sd.getSerializedClass().getName()); + + RecordWriter, HCatRecord> rw; + if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){ + // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. + // (That's because records can't be written until the values of the dynamic partitions are deduced. + // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) + rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context); + } else { + Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); + Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part")); + + rw = new FileRecordWriterContainer( + getBaseOutputFormat().getRecordWriter( + parentDir.getFileSystem(context.getConfiguration()), + new JobConf(context.getConfiguration()), + childPath.toString(), + InternalUtil.createReporter(context)), + context); + } + return rw; + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + handleDuplicatePublish(context, + jobInfo, + client, + new Table(jobInfo.getTableInfo().getTable())); + } catch (MetaException e) { + throw new IOException(e); + } catch (TException e) { + throw new IOException(e); + } finally { + HCatUtil.closeHiveClientQuietly(client); } - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - HiveMetaStoreClient client = null; - try { - HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - handleDuplicatePublish(context, - jobInfo, - client, - new Table(jobInfo.getTableInfo().getTable())); - } catch (MetaException e) { - throw new IOException(e); - } catch (TException e) { - throw new IOException(e); - } finally { - HCatUtil.closeHiveClientQuietly(client); + if (!jobInfo.isDynamicPartitioningUsed()) { + JobConf jobConf = new JobConf(context.getConfiguration()); + getBaseOutputFormat().checkOutputSpecs(null, jobConf); + //checkoutputspecs might've set some properties we need to have context reflect that + HCatUtil.copyConf(jobConf, context.getConfiguration()); + } + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { + //this needs to be manually set, under normal circumstances MR Task does this + setWorkOutputPath(context); + return new FileOutputCommitterContainer(context, + HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ? + null : + new JobConf(context.getConfiguration()).getOutputCommitter()); + } + + /** + * Handles duplicate publish of partition. Fails if partition already exists. + * For non partitioned tables, fails if files are present in table directory. + * For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time + * @param context the job + * @param outputInfo the output info + * @param client the metastore client + * @param table the table being written to + * @throws IOException + * @throws org.apache.hadoop.hive.metastore.api.MetaException + * @throws org.apache.thrift.TException + */ + private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, + HiveMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException { + + /* + * For fully specified ptn, follow strict checks for existence of partitions in metadata + * For unpartitioned tables, follow filechecks + * For partially specified tables: + * This would then need filechecks at the start of a ptn write, + * Doing metadata checks can get potentially very expensive (fat conf) if + * there are a large number of partitions that match the partial specifications + */ + + if (table.getPartitionKeys().size() > 0) { + if (!outputInfo.isDynamicPartitioningUsed()) { + List partitionValues = getPartitionValueList( + table, outputInfo.getPartitionValues()); + // fully-specified partition + List currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), + outputInfo.getTableName(), partitionValues, (short) 1); + + if (currentParts.size() > 0) { + throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION); } + } + } else { + List partitionValues = getPartitionValueList( + table, outputInfo.getPartitionValues()); + // non-partitioned table - if (!jobInfo.isDynamicPartitioningUsed()) { - JobConf jobConf = new JobConf(context.getConfiguration()); - getBaseOutputFormat().checkOutputSpecs(null, jobConf); - //checkoutputspecs might've set some properties we need to have context reflect that - HCatUtil.copyConf(jobConf, context.getConfiguration()); - } - } + Path tablePath = new Path(table.getTTable().getSd().getLocation()); + FileSystem fs = tablePath.getFileSystem(context.getConfiguration()); - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - //this needs to be manually set, under normal circumstances MR Task does this - setWorkOutputPath(context); - return new FileOutputCommitterContainer(context, - HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ? - null : - new JobConf(context.getConfiguration()).getOutputCommitter()); - } + if (fs.exists(tablePath)) { + FileStatus[] status = fs.globStatus(new Path(tablePath, "*"), hiddenFileFilter); - /** - * Handles duplicate publish of partition. Fails if partition already exists. - * For non partitioned tables, fails if files are present in table directory. - * For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time - * @param context the job - * @param outputInfo the output info - * @param client the metastore client - * @param table the table being written to - * @throws IOException - * @throws org.apache.hadoop.hive.metastore.api.MetaException - * @throws org.apache.thrift.TException - */ - private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, - HiveMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException { - - /* - * For fully specified ptn, follow strict checks for existence of partitions in metadata - * For unpartitioned tables, follow filechecks - * For partially specified tables: - * This would then need filechecks at the start of a ptn write, - * Doing metadata checks can get potentially very expensive (fat conf) if - * there are a large number of partitions that match the partial specifications - */ - - if (table.getPartitionKeys().size() > 0) { - if (!outputInfo.isDynamicPartitioningUsed()) { - List partitionValues = getPartitionValueList( - table, outputInfo.getPartitionValues()); - // fully-specified partition - List currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), - outputInfo.getTableName(), partitionValues, (short) 1); - - if (currentParts.size() > 0) { - throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION); - } - } - } else { - List partitionValues = getPartitionValueList( - table, outputInfo.getPartitionValues()); - // non-partitioned table - - Path tablePath = new Path(table.getTTable().getSd().getLocation()); - FileSystem fs = tablePath.getFileSystem(context.getConfiguration()); - - if (fs.exists(tablePath)) { - FileStatus[] status = fs.globStatus(new Path(tablePath, "*"), hiddenFileFilter); - - if (status.length > 0) { - throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, - table.getDbName() + "." + table.getTableName()); - } - } + if (status.length > 0) { + throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, + table.getDbName() + "." + table.getTableName()); } + } + } + } + + /** + * Convert the partition value map to a value list in the partition key order. + * @param table the table being written to + * @param valueMap the partition value map + * @return the partition value list + * @throws java.io.IOException + */ + static List getPartitionValueList(Table table, Map valueMap) throws IOException { + + if (valueMap.size() != table.getPartitionKeys().size()) { + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Table " + + table.getTableName() + " has " + + table.getPartitionKeys().size() + " partition keys, got " + + valueMap.size()); } - /** - * Convert the partition value map to a value list in the partition key order. - * @param table the table being written to - * @param valueMap the partition value map - * @return the partition value list - * @throws java.io.IOException - */ - static List getPartitionValueList(Table table, Map valueMap) throws IOException { - - if (valueMap.size() != table.getPartitionKeys().size()) { - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Table " - + table.getTableName() + " has " + - table.getPartitionKeys().size() + " partition keys, got " + - valueMap.size()); - } - - List values = new ArrayList(); - - for (FieldSchema schema : table.getPartitionKeys()) { - String value = valueMap.get(schema.getName().toLowerCase()); + List values = new ArrayList(); - if (value == null) { - throw new HCatException(ErrorType.ERROR_MISSING_PARTITION_KEY, - "Key " + schema.getName() + " of table " + table.getTableName()); - } + for (FieldSchema schema : table.getPartitionKeys()) { + String value = valueMap.get(schema.getName().toLowerCase()); - values.add(value); - } + if (value == null) { + throw new HCatException(ErrorType.ERROR_MISSING_PARTITION_KEY, + "Key " + schema.getName() + " of table " + table.getTableName()); + } - return values; + values.add(value); } - static void setWorkOutputPath(TaskAttemptContext context) throws IOException { - String outputPath = context.getConfiguration().get("mapred.output.dir"); - //we need to do this to get the task path and set it for mapred implementation - //since it can't be done automatically because of mapreduce->mapred abstraction - if (outputPath != null) - context.getConfiguration().set("mapred.work.output.dir", - new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); - } + return values; + } + + static void setWorkOutputPath(TaskAttemptContext context) throws IOException { + String outputPath = context.getConfiguration().get("mapred.output.dir"); + //we need to do this to get the task path and set it for mapred implementation + //since it can't be done automatically because of mapreduce->mapred abstraction + if (outputPath != null) + context.getConfiguration().set("mapred.work.output.dir", + new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileRecordWriterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileRecordWriterContainer.java index 58c137d..90ffe99 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileRecordWriterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileRecordWriterContainer.java @@ -51,216 +51,216 @@ */ class FileRecordWriterContainer extends RecordWriterContainer { - private final HCatStorageHandler storageHandler; - private final SerDe serDe; - private final ObjectInspector objectInspector; - - private boolean dynamicPartitioningUsed = false; - - private final Map, ? super Writable>> baseDynamicWriters; - private final Map baseDynamicSerDe; - private final Map baseDynamicCommitters; - private final Map dynamicContexts; - private final Map dynamicObjectInspectors; - private Map dynamicOutputJobInfo; - - - private final List partColsToDel; - private final List dynamicPartCols; - private int maxDynamicPartitions; - - private OutputJobInfo jobInfo; - private TaskAttemptContext context; - - /** - * @param baseWriter RecordWriter to contain - * @param context current TaskAttemptContext - * @throws IOException - * @throws InterruptedException - */ - public FileRecordWriterContainer(org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseWriter, - TaskAttemptContext context) throws IOException, InterruptedException { - super(context, baseWriter); - this.context = context; - jobInfo = HCatOutputFormat.getJobInfo(context); - - storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); - objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); - try { - InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to inialize SerDe", e); - } - - // If partition columns occur in data, we want to remove them. - partColsToDel = jobInfo.getPosOfPartCols(); - dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); - dynamicPartCols = jobInfo.getPosOfDynPartCols(); - maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - - if ((partColsToDel == null) || (dynamicPartitioningUsed && (dynamicPartCols == null))) { - throw new HCatException("It seems that setSchema() is not called on " + - "HCatOutputFormat. Please make sure that method is called."); - } + private final HCatStorageHandler storageHandler; + private final SerDe serDe; + private final ObjectInspector objectInspector; + + private boolean dynamicPartitioningUsed = false; + + private final Map, ? super Writable>> baseDynamicWriters; + private final Map baseDynamicSerDe; + private final Map baseDynamicCommitters; + private final Map dynamicContexts; + private final Map dynamicObjectInspectors; + private Map dynamicOutputJobInfo; + + + private final List partColsToDel; + private final List dynamicPartCols; + private int maxDynamicPartitions; + + private OutputJobInfo jobInfo; + private TaskAttemptContext context; + + /** + * @param baseWriter RecordWriter to contain + * @param context current TaskAttemptContext + * @throws IOException + * @throws InterruptedException + */ + public FileRecordWriterContainer(org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseWriter, + TaskAttemptContext context) throws IOException, InterruptedException { + super(context, baseWriter); + this.context = context; + jobInfo = HCatOutputFormat.getJobInfo(context); + + storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); + objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); + try { + InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); + } catch (SerDeException e) { + throw new IOException("Failed to inialize SerDe", e); + } + // If partition columns occur in data, we want to remove them. + partColsToDel = jobInfo.getPosOfPartCols(); + dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); + dynamicPartCols = jobInfo.getPosOfDynPartCols(); + maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - if (!dynamicPartitioningUsed) { - this.baseDynamicSerDe = null; - this.baseDynamicWriters = null; - this.baseDynamicCommitters = null; - this.dynamicContexts = null; - this.dynamicObjectInspectors = null; - this.dynamicOutputJobInfo = null; - } else { - this.baseDynamicSerDe = new HashMap(); - this.baseDynamicWriters = new HashMap, ? super Writable>>(); - this.baseDynamicCommitters = new HashMap(); - this.dynamicContexts = new HashMap(); - this.dynamicObjectInspectors = new HashMap(); - this.dynamicOutputJobInfo = new HashMap(); - } + if ((partColsToDel == null) || (dynamicPartitioningUsed && (dynamicPartCols == null))) { + throw new HCatException("It seems that setSchema() is not called on " + + "HCatOutputFormat. Please make sure that method is called."); } - /** - * @return the storagehandler - */ - public HCatStorageHandler getStorageHandler() { - return storageHandler; - } - @Override - public void close(TaskAttemptContext context) throws IOException, - InterruptedException { - Reporter reporter = InternalUtil.createReporter(context); - if (dynamicPartitioningUsed) { - for (org.apache.hadoop.mapred.RecordWriter, ? super Writable> bwriter : baseDynamicWriters.values()) { - //We are in RecordWriter.close() make sense that the context would be TaskInputOutput - bwriter.close(reporter); - } - for (Map.Entry entry : baseDynamicCommitters.entrySet()) { - org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); - OutputCommitter baseOutputCommitter = entry.getValue(); - if (baseOutputCommitter.needsTaskCommit(currContext)) { - baseOutputCommitter.commitTask(currContext); - } - } - } else { - getBaseRecordWriter().close(reporter); - } + if (!dynamicPartitioningUsed) { + this.baseDynamicSerDe = null; + this.baseDynamicWriters = null; + this.baseDynamicCommitters = null; + this.dynamicContexts = null; + this.dynamicObjectInspectors = null; + this.dynamicOutputJobInfo = null; + } else { + this.baseDynamicSerDe = new HashMap(); + this.baseDynamicWriters = new HashMap, ? super Writable>>(); + this.baseDynamicCommitters = new HashMap(); + this.dynamicContexts = new HashMap(); + this.dynamicObjectInspectors = new HashMap(); + this.dynamicOutputJobInfo = new HashMap(); } - - @Override - public void write(WritableComparable key, HCatRecord value) throws IOException, - InterruptedException { - - org.apache.hadoop.mapred.RecordWriter localWriter; - ObjectInspector localObjectInspector; - SerDe localSerDe; - OutputJobInfo localJobInfo = null; - - if (dynamicPartitioningUsed) { - // calculate which writer to use from the remaining values - this needs to be done before we delete cols - List dynamicPartValues = new ArrayList(); - for (Integer colToAppend : dynamicPartCols) { - dynamicPartValues.add(value.get(colToAppend).toString()); - } - - String dynKey = dynamicPartValues.toString(); - if (!baseDynamicWriters.containsKey(dynKey)) { - if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { - throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, - "Number of dynamic partitions being created " - + "exceeds configured max allowable partitions[" - + maxDynamicPartitions - + "], increase parameter [" - + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname - + "] if needed."); - } - - org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context); - configureDynamicStorageHandler(currTaskContext, dynamicPartValues); - localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext); - - //setup serDe - SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); - try { - InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to initialize SerDe", e); - } - - //create base OutputFormat - org.apache.hadoop.mapred.OutputFormat baseOF = - ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); - - //We are skipping calling checkOutputSpecs() for each partition - //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition - //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance - //In general this should be ok for most FileOutputFormat implementations - //but may become an issue for cases when the method is used to perform other setup tasks - - //get Output Committer - org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter(); - //create currJobContext the latest so it gets all the config changes - org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext); - //setupJob() - baseOutputCommitter.setupJob(currJobContext); - //recreate to refresh jobConf of currTask context - currTaskContext = - HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), - currTaskContext.getTaskAttemptID(), - currTaskContext.getProgressible()); - //set temp location - currTaskContext.getConfiguration().set("mapred.work.output.dir", - new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString()); - //setupTask() - baseOutputCommitter.setupTask(currTaskContext); - - Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); - Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", "")); - - org.apache.hadoop.mapred.RecordWriter baseRecordWriter = - baseOF.getRecordWriter( - parentDir.getFileSystem(currTaskContext.getConfiguration()), - currTaskContext.getJobConf(), - childPath.toString(), - InternalUtil.createReporter(currTaskContext)); - - baseDynamicWriters.put(dynKey, baseRecordWriter); - baseDynamicSerDe.put(dynKey, currSerDe); - baseDynamicCommitters.put(dynKey, baseOutputCommitter); - dynamicContexts.put(dynKey, currTaskContext); - dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); - dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey))); - } - - localJobInfo = dynamicOutputJobInfo.get(dynKey); - localWriter = baseDynamicWriters.get(dynKey); - localSerDe = baseDynamicSerDe.get(dynKey); - localObjectInspector = dynamicObjectInspectors.get(dynKey); - } else { - localJobInfo = jobInfo; - localWriter = getBaseRecordWriter(); - localSerDe = serDe; - localObjectInspector = objectInspector; + } + + /** + * @return the storagehandler + */ + public HCatStorageHandler getStorageHandler() { + return storageHandler; + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + Reporter reporter = InternalUtil.createReporter(context); + if (dynamicPartitioningUsed) { + for (org.apache.hadoop.mapred.RecordWriter, ? super Writable> bwriter : baseDynamicWriters.values()) { + //We are in RecordWriter.close() make sense that the context would be TaskInputOutput + bwriter.close(reporter); + } + for (Map.Entry entry : baseDynamicCommitters.entrySet()) { + org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); + OutputCommitter baseOutputCommitter = entry.getValue(); + if (baseOutputCommitter.needsTaskCommit(currContext)) { + baseOutputCommitter.commitTask(currContext); } - - for (Integer colToDel : partColsToDel) { - value.remove(colToDel); + } + } else { + getBaseRecordWriter().close(reporter); + } + } + + @Override + public void write(WritableComparable key, HCatRecord value) throws IOException, + InterruptedException { + + org.apache.hadoop.mapred.RecordWriter localWriter; + ObjectInspector localObjectInspector; + SerDe localSerDe; + OutputJobInfo localJobInfo = null; + + if (dynamicPartitioningUsed) { + // calculate which writer to use from the remaining values - this needs to be done before we delete cols + List dynamicPartValues = new ArrayList(); + for (Integer colToAppend : dynamicPartCols) { + dynamicPartValues.add(value.get(colToAppend).toString()); + } + + String dynKey = dynamicPartValues.toString(); + if (!baseDynamicWriters.containsKey(dynKey)) { + if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { + throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, + "Number of dynamic partitions being created " + + "exceeds configured max allowable partitions[" + + maxDynamicPartitions + + "], increase parameter [" + + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + + "] if needed."); } + org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context); + configureDynamicStorageHandler(currTaskContext, dynamicPartValues); + localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext); - //The key given by user is ignored + //setup serDe + SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); try { - localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector)); + InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); } catch (SerDeException e) { - throw new IOException("Failed to serialize object", e); + throw new IOException("Failed to initialize SerDe", e); } + + //create base OutputFormat + org.apache.hadoop.mapred.OutputFormat baseOF = + ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); + + //We are skipping calling checkOutputSpecs() for each partition + //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition + //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance + //In general this should be ok for most FileOutputFormat implementations + //but may become an issue for cases when the method is used to perform other setup tasks + + //get Output Committer + org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter(); + //create currJobContext the latest so it gets all the config changes + org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext); + //setupJob() + baseOutputCommitter.setupJob(currJobContext); + //recreate to refresh jobConf of currTask context + currTaskContext = + HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), + currTaskContext.getTaskAttemptID(), + currTaskContext.getProgressible()); + //set temp location + currTaskContext.getConfiguration().set("mapred.work.output.dir", + new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString()); + //setupTask() + baseOutputCommitter.setupTask(currTaskContext); + + Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); + Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", "")); + + org.apache.hadoop.mapred.RecordWriter baseRecordWriter = + baseOF.getRecordWriter( + parentDir.getFileSystem(currTaskContext.getConfiguration()), + currTaskContext.getJobConf(), + childPath.toString(), + InternalUtil.createReporter(currTaskContext)); + + baseDynamicWriters.put(dynKey, baseRecordWriter); + baseDynamicSerDe.put(dynKey, currSerDe); + baseDynamicCommitters.put(dynKey, baseOutputCommitter); + dynamicContexts.put(dynKey, currTaskContext); + dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); + dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey))); + } + + localJobInfo = dynamicOutputJobInfo.get(dynKey); + localWriter = baseDynamicWriters.get(dynKey); + localSerDe = baseDynamicSerDe.get(dynKey); + localObjectInspector = dynamicObjectInspectors.get(dynKey); + } else { + localJobInfo = jobInfo; + localWriter = getBaseRecordWriter(); + localSerDe = serDe; + localObjectInspector = objectInspector; } - protected void configureDynamicStorageHandler(JobContext context, List dynamicPartVals) throws IOException { - HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); + for (Integer colToDel : partColsToDel) { + value.remove(colToDel); } + + //The key given by user is ignored + try { + localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector)); + } catch (SerDeException e) { + throw new IOException("Failed to serialize object", e); + } + } + + protected void configureDynamicStorageHandler(JobContext context, List dynamicPartVals) throws IOException { + HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); + } + } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java index 09739a1..860025e 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java @@ -46,141 +46,141 @@ */ public class FosterStorageHandler extends HCatStorageHandler { - public Configuration conf; - /** The directory under which data is initially written for a partitioned table */ - protected static final String DYNTEMP_DIR_NAME = "_DYN"; - - /** The directory under which data is initially written for a non partitioned table */ - protected static final String TEMP_DIR_NAME = "_TEMP"; - - private Class ifClass; - private Class ofClass; - private Class serDeClass; - - public FosterStorageHandler(String ifName, String ofName, String serdeName) throws ClassNotFoundException { - this((Class) Class.forName(ifName), - (Class) Class.forName(ofName), - (Class) Class.forName(serdeName)); - } - - public FosterStorageHandler(Class ifClass, - Class ofClass, - Class serDeClass) { - this.ifClass = ifClass; - this.ofClass = ofClass; - this.serDeClass = serDeClass; - } - - @Override - public Class getInputFormatClass() { - return ifClass; //To change body of overridden methods use File | Settings | File Templates. - } - - @Override - public Class getOutputFormatClass() { - return ofClass; //To change body of overridden methods use File | Settings | File Templates. - } - - @Override - public Class getSerDeClass() { - return serDeClass; //To change body of implemented methods use File | Settings | File Templates. - } - - @Override - public HiveMetaHook getMetaHook() { - return null; - } - - @Override - public void configureInputJobProperties(TableDesc tableDesc, - Map jobProperties) { - - } - - @Override - public void configureOutputJobProperties(TableDesc tableDesc, - Map jobProperties) { - try { - OutputJobInfo jobInfo = (OutputJobInfo) - HCatUtil.deserialize(tableDesc.getJobProperties().get( - HCatConstants.HCAT_KEY_OUTPUT_INFO)); - String parentPath = jobInfo.getTableInfo().getTableLocation(); - String dynHash = tableDesc.getJobProperties().get( - HCatConstants.HCAT_DYNAMIC_PTN_JOBID); - - // For dynamic partitioned writes without all keyvalues specified, - // we create a temp dir for the associated write job - if (dynHash != null) { - parentPath = new Path(parentPath, - DYNTEMP_DIR_NAME + dynHash).toString(); - } - - String outputLocation; - - if ((dynHash == null) - && Boolean.valueOf((String)tableDesc.getProperties().get("EXTERNAL")) - && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { - // honor custom location for external table apart from what metadata specifies - // only if we're not using dynamic partitioning - see HIVE-5011 - outputLocation = jobInfo.getLocation(); - } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { - // For non-partitioned tables, we send them to the temp dir - outputLocation = TEMP_DIR_NAME; - } else { - List cols = new ArrayList(); - List values = new ArrayList(); - - //Get the output location in the order partition keys are defined for the table. - for (String name : - jobInfo.getTableInfo(). - getPartitionColumns().getFieldNames()) { - String value = jobInfo.getPartitionValues().get(name); - cols.add(name); - values.add(value); - } - outputLocation = FileUtils.makePartName(cols, values); - } - - jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); - - //only set output dir if partition is fully materialized - if (jobInfo.getPartitionValues().size() - == jobInfo.getTableInfo().getPartitionColumns().size()) { - jobProperties.put("mapred.output.dir", jobInfo.getLocation()); - } - - //TODO find a better home for this, RCFile specifc - jobProperties.put(RCFile.COLUMN_NUMBER_CONF_STR, - Integer.toOctalString( - jobInfo.getOutputSchema().getFields().size())); - jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(jobInfo)); - } catch (IOException e) { - throw new IllegalStateException("Failed to set output path", e); + public Configuration conf; + /** The directory under which data is initially written for a partitioned table */ + protected static final String DYNTEMP_DIR_NAME = "_DYN"; + + /** The directory under which data is initially written for a non partitioned table */ + protected static final String TEMP_DIR_NAME = "_TEMP"; + + private Class ifClass; + private Class ofClass; + private Class serDeClass; + + public FosterStorageHandler(String ifName, String ofName, String serdeName) throws ClassNotFoundException { + this((Class) Class.forName(ifName), + (Class) Class.forName(ofName), + (Class) Class.forName(serdeName)); + } + + public FosterStorageHandler(Class ifClass, + Class ofClass, + Class serDeClass) { + this.ifClass = ifClass; + this.ofClass = ofClass; + this.serDeClass = serDeClass; + } + + @Override + public Class getInputFormatClass() { + return ifClass; //To change body of overridden methods use File | Settings | File Templates. + } + + @Override + public Class getOutputFormatClass() { + return ofClass; //To change body of overridden methods use File | Settings | File Templates. + } + + @Override + public Class getSerDeClass() { + return serDeClass; //To change body of implemented methods use File | Settings | File Templates. + } + + @Override + public HiveMetaHook getMetaHook() { + return null; + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, + Map jobProperties) { + + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, + Map jobProperties) { + try { + OutputJobInfo jobInfo = (OutputJobInfo) + HCatUtil.deserialize(tableDesc.getJobProperties().get( + HCatConstants.HCAT_KEY_OUTPUT_INFO)); + String parentPath = jobInfo.getTableInfo().getTableLocation(); + String dynHash = tableDesc.getJobProperties().get( + HCatConstants.HCAT_DYNAMIC_PTN_JOBID); + + // For dynamic partitioned writes without all keyvalues specified, + // we create a temp dir for the associated write job + if (dynHash != null) { + parentPath = new Path(parentPath, + DYNTEMP_DIR_NAME + dynHash).toString(); + } + + String outputLocation; + + if ((dynHash == null) + && Boolean.valueOf((String)tableDesc.getProperties().get("EXTERNAL")) + && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { + // honor custom location for external table apart from what metadata specifies + // only if we're not using dynamic partitioning - see HIVE-5011 + outputLocation = jobInfo.getLocation(); + } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { + // For non-partitioned tables, we send them to the temp dir + outputLocation = TEMP_DIR_NAME; + } else { + List cols = new ArrayList(); + List values = new ArrayList(); + + //Get the output location in the order partition keys are defined for the table. + for (String name : + jobInfo.getTableInfo(). + getPartitionColumns().getFieldNames()) { + String value = jobInfo.getPartitionValues().get(name); + cols.add(name); + values.add(value); } - + outputLocation = FileUtils.makePartName(cols, values); + } + + jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); + + //only set output dir if partition is fully materialized + if (jobInfo.getPartitionValues().size() + == jobInfo.getTableInfo().getPartitionColumns().size()) { + jobProperties.put("mapred.output.dir", jobInfo.getLocation()); + } + + //TODO find a better home for this, RCFile specifc + jobProperties.put(RCFile.COLUMN_NUMBER_CONF_STR, + Integer.toOctalString( + jobInfo.getOutputSchema().getFields().size())); + jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(jobInfo)); + } catch (IOException e) { + throw new IllegalStateException("Failed to set output path", e); } - @Override - OutputFormatContainer getOutputFormatContainer( - org.apache.hadoop.mapred.OutputFormat outputFormat) { - return new FileOutputFormatContainer(outputFormat); - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - return new DefaultHiveAuthorizationProvider(); - } + } + + @Override + OutputFormatContainer getOutputFormatContainer( + org.apache.hadoop.mapred.OutputFormat outputFormat) { + return new FileOutputFormatContainer(outputFormat); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + return new DefaultHiveAuthorizationProvider(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java index 0e3938e..e9e1897 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java @@ -48,286 +48,286 @@ import org.apache.hive.hcatalog.data.schema.HCatSchema; public abstract class HCatBaseInputFormat - extends InputFormat { - - /** - * get the schema for the HCatRecord data returned by HCatInputFormat. - * - * @param context the jobContext - * @throws IllegalArgumentException - */ - private Class inputFileFormatClass; - - // TODO needs to go in InitializeInput? as part of InputJobInfo - private static HCatSchema getOutputSchema(Configuration conf) - throws IOException { - String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); - if (os == null) { - return getTableSchema(conf); - } else { - return (HCatSchema) HCatUtil.deserialize(os); - } - } - - /** - * Set the schema for the HCatRecord data returned by HCatInputFormat. - * @param job the job object - * @param hcatSchema the schema to use as the consolidated schema - */ - public static void setOutputSchema(Job job, HCatSchema hcatSchema) - throws IOException { - job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, - HCatUtil.serialize(hcatSchema)); + extends InputFormat { + + /** + * get the schema for the HCatRecord data returned by HCatInputFormat. + * + * @param context the jobContext + * @throws IllegalArgumentException + */ + private Class inputFileFormatClass; + + // TODO needs to go in InitializeInput? as part of InputJobInfo + private static HCatSchema getOutputSchema(Configuration conf) + throws IOException { + String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); + if (os == null) { + return getTableSchema(conf); + } else { + return (HCatSchema) HCatUtil.deserialize(os); } - - protected static org.apache.hadoop.mapred.InputFormat - getMapRedInputFormat(JobConf job, Class inputFormatClass) throws IOException { - return ( - org.apache.hadoop.mapred.InputFormat) - ReflectionUtils.newInstance(inputFormatClass, job); + } + + /** + * Set the schema for the HCatRecord data returned by HCatInputFormat. + * @param job the job object + * @param hcatSchema the schema to use as the consolidated schema + */ + public static void setOutputSchema(Job job, HCatSchema hcatSchema) + throws IOException { + job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, + HCatUtil.serialize(hcatSchema)); + } + + protected static org.apache.hadoop.mapred.InputFormat + getMapRedInputFormat(JobConf job, Class inputFormatClass) throws IOException { + return ( + org.apache.hadoop.mapred.InputFormat) + ReflectionUtils.newInstance(inputFormatClass, job); + } + + /** + * Logically split the set of input files for the job. Returns the + * underlying InputFormat's splits + * @param jobContext the job context object + * @return the splits, an HCatInputSplit wrapper over the storage + * handler InputSplits + * @throws IOException or InterruptedException + */ + @Override + public List getSplits(JobContext jobContext) + throws IOException, InterruptedException { + Configuration conf = jobContext.getConfiguration(); + + //Get the job info from the configuration, + //throws exception if not initialized + InputJobInfo inputJobInfo; + try { + inputJobInfo = getJobInfo(conf); + } catch (Exception e) { + throw new IOException(e); } - /** - * Logically split the set of input files for the job. Returns the - * underlying InputFormat's splits - * @param jobContext the job context object - * @return the splits, an HCatInputSplit wrapper over the storage - * handler InputSplits - * @throws IOException or InterruptedException - */ - @Override - public List getSplits(JobContext jobContext) - throws IOException, InterruptedException { - Configuration conf = jobContext.getConfiguration(); - - //Get the job info from the configuration, - //throws exception if not initialized - InputJobInfo inputJobInfo; - try { - inputJobInfo = getJobInfo(conf); - } catch (Exception e) { - throw new IOException(e); - } - - List splits = new ArrayList(); - List partitionInfoList = inputJobInfo.getPartitions(); - if (partitionInfoList == null) { - //No partitions match the specified partition filter - return splits; - } - - HCatStorageHandler storageHandler; - JobConf jobConf; - //For each matching partition, call getSplits on the underlying InputFormat - for (PartInfo partitionInfo : partitionInfoList) { - jobConf = HCatUtil.getJobConfFromContext(jobContext); - setInputPath(jobConf, partitionInfo.getLocation()); - Map jobProperties = partitionInfo.getJobProperties(); - - HCatSchema allCols = new HCatSchema(new LinkedList()); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getDataColumns().getFields()) - allCols.append(field); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getPartitionColumns().getFields()) - allCols.append(field); - - HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); - - storageHandler = HCatUtil.getStorageHandler( - jobConf, partitionInfo); - - //Get the input format - Class inputFormatClass = storageHandler.getInputFormatClass(); - org.apache.hadoop.mapred.InputFormat inputFormat = - getMapRedInputFormat(jobConf, inputFormatClass); - - //Call getSplit on the InputFormat, create an HCatSplit for each - //underlying split. When the desired number of input splits is missing, - //use a default number (denoted by zero). - //TODO(malewicz): Currently each partition is split independently into - //a desired number. However, we want the union of all partitions to be - //split into a desired number while maintaining balanced sizes of input - //splits. - int desiredNumSplits = - conf.getInt(HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, 0); - org.apache.hadoop.mapred.InputSplit[] baseSplits = - inputFormat.getSplits(jobConf, desiredNumSplits); - - for (org.apache.hadoop.mapred.InputSplit split : baseSplits) { - splits.add(new HCatSplit( - partitionInfo, - split, allCols)); - } - } - - return splits; + List splits = new ArrayList(); + List partitionInfoList = inputJobInfo.getPartitions(); + if (partitionInfoList == null) { + //No partitions match the specified partition filter + return splits; } - /** - * Create the RecordReader for the given InputSplit. Returns the underlying - * RecordReader if the required operations are supported and schema matches - * with HCatTable schema. Returns an HCatRecordReader if operations need to - * be implemented in HCat. - * @param split the split - * @param taskContext the task attempt context - * @return the record reader instance, either an HCatRecordReader(later) or - * the underlying storage handler's RecordReader - * @throws IOException or InterruptedException - */ - @Override - public RecordReader - createRecordReader(InputSplit split, - TaskAttemptContext taskContext) throws IOException, InterruptedException { - - HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); - PartInfo partitionInfo = hcatSplit.getPartitionInfo(); - JobContext jobContext = taskContext; - Configuration conf = jobContext.getConfiguration(); - - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( - conf, partitionInfo); - - JobConf jobConf = HCatUtil.getJobConfFromContext(jobContext); - Map jobProperties = partitionInfo.getJobProperties(); - HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); - - Map valuesNotInDataCols = getColValsNotInDataColumns( - getOutputSchema(conf), partitionInfo - ); - - return new HCatRecordReader(storageHandler, valuesNotInDataCols); + HCatStorageHandler storageHandler; + JobConf jobConf; + //For each matching partition, call getSplits on the underlying InputFormat + for (PartInfo partitionInfo : partitionInfoList) { + jobConf = HCatUtil.getJobConfFromContext(jobContext); + setInputPath(jobConf, partitionInfo.getLocation()); + Map jobProperties = partitionInfo.getJobProperties(); + + HCatSchema allCols = new HCatSchema(new LinkedList()); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getDataColumns().getFields()) + allCols.append(field); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getPartitionColumns().getFields()) + allCols.append(field); + + HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); + + storageHandler = HCatUtil.getStorageHandler( + jobConf, partitionInfo); + + //Get the input format + Class inputFormatClass = storageHandler.getInputFormatClass(); + org.apache.hadoop.mapred.InputFormat inputFormat = + getMapRedInputFormat(jobConf, inputFormatClass); + + //Call getSplit on the InputFormat, create an HCatSplit for each + //underlying split. When the desired number of input splits is missing, + //use a default number (denoted by zero). + //TODO(malewicz): Currently each partition is split independently into + //a desired number. However, we want the union of all partitions to be + //split into a desired number while maintaining balanced sizes of input + //splits. + int desiredNumSplits = + conf.getInt(HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, 0); + org.apache.hadoop.mapred.InputSplit[] baseSplits = + inputFormat.getSplits(jobConf, desiredNumSplits); + + for (org.apache.hadoop.mapred.InputSplit split : baseSplits) { + splits.add(new HCatSplit( + partitionInfo, + split, allCols)); + } } - - /** - * gets values for fields requested by output schema which will not be in the data - */ - private static Map getColValsNotInDataColumns(HCatSchema outputSchema, - PartInfo partInfo) { - HCatSchema dataSchema = partInfo.getPartitionSchema(); - Map vals = new HashMap(); - for (String fieldName : outputSchema.getFieldNames()) { - if (dataSchema.getPosition(fieldName) == null) { - // this entry of output is not present in the output schema - // so, we first check the table schema to see if it is a part col - - if (partInfo.getPartitionValues().containsKey(fieldName)) { - vals.put(fieldName, partInfo.getPartitionValues().get(fieldName)); - } else { - vals.put(fieldName, null); - } - } + return splits; + } + + /** + * Create the RecordReader for the given InputSplit. Returns the underlying + * RecordReader if the required operations are supported and schema matches + * with HCatTable schema. Returns an HCatRecordReader if operations need to + * be implemented in HCat. + * @param split the split + * @param taskContext the task attempt context + * @return the record reader instance, either an HCatRecordReader(later) or + * the underlying storage handler's RecordReader + * @throws IOException or InterruptedException + */ + @Override + public RecordReader + createRecordReader(InputSplit split, + TaskAttemptContext taskContext) throws IOException, InterruptedException { + + HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); + PartInfo partitionInfo = hcatSplit.getPartitionInfo(); + JobContext jobContext = taskContext; + Configuration conf = jobContext.getConfiguration(); + + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( + conf, partitionInfo); + + JobConf jobConf = HCatUtil.getJobConfFromContext(jobContext); + Map jobProperties = partitionInfo.getJobProperties(); + HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); + + Map valuesNotInDataCols = getColValsNotInDataColumns( + getOutputSchema(conf), partitionInfo + ); + + return new HCatRecordReader(storageHandler, valuesNotInDataCols); + } + + + /** + * gets values for fields requested by output schema which will not be in the data + */ + private static Map getColValsNotInDataColumns(HCatSchema outputSchema, + PartInfo partInfo) { + HCatSchema dataSchema = partInfo.getPartitionSchema(); + Map vals = new HashMap(); + for (String fieldName : outputSchema.getFieldNames()) { + if (dataSchema.getPosition(fieldName) == null) { + // this entry of output is not present in the output schema + // so, we first check the table schema to see if it is a part col + + if (partInfo.getPartitionValues().containsKey(fieldName)) { + vals.put(fieldName, partInfo.getPartitionValues().get(fieldName)); + } else { + vals.put(fieldName, null); } - return vals; - } - - /** - * @see org.apache.hive.hcatalog.mapreduce.HCatBaseInputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) - * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} - */ - public static HCatSchema getTableSchema(JobContext context) - throws IOException { - return getTableSchema(context.getConfiguration()); + } } - - - /** - * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call - * on the specified job context. This information is available only after HCatInputFormat.setInput - * has been called for a JobContext. - * @param conf the Configuration object - * @return the table schema - * @throws IOException if HCatInputFormat.setInput has not been called - * for the current context - */ - public static HCatSchema getTableSchema(Configuration conf) - throws IOException { - InputJobInfo inputJobInfo = getJobInfo(conf); - HCatSchema allCols = new HCatSchema(new LinkedList()); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getDataColumns().getFields()) - allCols.append(field); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getPartitionColumns().getFields()) - allCols.append(field); - return allCols; + return vals; + } + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatBaseInputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) + * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} + */ + public static HCatSchema getTableSchema(JobContext context) + throws IOException { + return getTableSchema(context.getConfiguration()); + } + + + /** + * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call + * on the specified job context. This information is available only after HCatInputFormat.setInput + * has been called for a JobContext. + * @param conf the Configuration object + * @return the table schema + * @throws IOException if HCatInputFormat.setInput has not been called + * for the current context + */ + public static HCatSchema getTableSchema(Configuration conf) + throws IOException { + InputJobInfo inputJobInfo = getJobInfo(conf); + HCatSchema allCols = new HCatSchema(new LinkedList()); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getDataColumns().getFields()) + allCols.append(field); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getPartitionColumns().getFields()) + allCols.append(field); + return allCols; + } + + /** + * Gets the InputJobInfo object by reading the Configuration and deserializing + * the string. If InputJobInfo is not present in the configuration, throws an + * exception since that means HCatInputFormat.setInput has not been called. + * @param conf the Configuration object + * @return the InputJobInfo object + * @throws IOException the exception + */ + private static InputJobInfo getJobInfo(Configuration conf) + throws IOException { + String jobString = conf.get( + HCatConstants.HCAT_KEY_JOB_INFO); + if (jobString == null) { + throw new IOException("job information not found in JobContext." + + " HCatInputFormat.setInput() not called?"); } - /** - * Gets the InputJobInfo object by reading the Configuration and deserializing - * the string. If InputJobInfo is not present in the configuration, throws an - * exception since that means HCatInputFormat.setInput has not been called. - * @param conf the Configuration object - * @return the InputJobInfo object - * @throws IOException the exception - */ - private static InputJobInfo getJobInfo(Configuration conf) - throws IOException { - String jobString = conf.get( - HCatConstants.HCAT_KEY_JOB_INFO); - if (jobString == null) { - throw new IOException("job information not found in JobContext." - + " HCatInputFormat.setInput() not called?"); + return (InputJobInfo) HCatUtil.deserialize(jobString); + } + + private void setInputPath(JobConf jobConf, String location) + throws IOException { + + // ideally we should just call FileInputFormat.setInputPaths() here - but + // that won't work since FileInputFormat.setInputPaths() needs + // a Job object instead of a JobContext which we are handed here + + int length = location.length(); + int curlyOpen = 0; + int pathStart = 0; + boolean globPattern = false; + List pathStrings = new ArrayList(); + + for (int i = 0; i < length; i++) { + char ch = location.charAt(i); + switch (ch) { + case '{': { + curlyOpen++; + if (!globPattern) { + globPattern = true; } - - return (InputJobInfo) HCatUtil.deserialize(jobString); - } - - private void setInputPath(JobConf jobConf, String location) - throws IOException { - - // ideally we should just call FileInputFormat.setInputPaths() here - but - // that won't work since FileInputFormat.setInputPaths() needs - // a Job object instead of a JobContext which we are handed here - - int length = location.length(); - int curlyOpen = 0; - int pathStart = 0; - boolean globPattern = false; - List pathStrings = new ArrayList(); - - for (int i = 0; i < length; i++) { - char ch = location.charAt(i); - switch (ch) { - case '{': { - curlyOpen++; - if (!globPattern) { - globPattern = true; - } - break; - } - case '}': { - curlyOpen--; - if (curlyOpen == 0 && globPattern) { - globPattern = false; - } - break; - } - case ',': { - if (!globPattern) { - pathStrings.add(location.substring(pathStart, i)); - pathStart = i + 1; - } - break; - } - } + break; + } + case '}': { + curlyOpen--; + if (curlyOpen == 0 && globPattern) { + globPattern = false; } - pathStrings.add(location.substring(pathStart, length)); - - Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); - String separator = ""; - StringBuilder str = new StringBuilder(); - - for (Path path : paths) { - FileSystem fs = path.getFileSystem(jobConf); - final String qualifiedPath = fs.makeQualified(path).toString(); - str.append(separator) - .append(StringUtils.escapeString(qualifiedPath)); - separator = StringUtils.COMMA_STR; + break; + } + case ',': { + if (!globPattern) { + pathStrings.add(location.substring(pathStart, i)); + pathStart = i + 1; } - - jobConf.set("mapred.input.dir", str.toString()); + break; + } + } } + pathStrings.add(location.substring(pathStart, length)); + + Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); + String separator = ""; + StringBuilder str = new StringBuilder(); + + for (Path path : paths) { + FileSystem fs = path.getFileSystem(jobConf); + final String qualifiedPath = fs.makeQualified(path).toString(); + str.append(separator) + .append(StringUtils.escapeString(qualifiedPath)); + separator = StringUtils.COMMA_STR; + } + + jobConf.set("mapred.input.dir", str.toString()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java index 23b5c8f..ffec3ca 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java @@ -41,116 +41,116 @@ // static final private Log LOG = LogFactory.getLog(HCatBaseOutputFormat.class); - /** - * @see org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) - * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} - */ - public static HCatSchema getTableSchema(JobContext context) throws IOException { - return getTableSchema(context.getConfiguration()); + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) + * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} + */ + public static HCatSchema getTableSchema(JobContext context) throws IOException { + return getTableSchema(context.getConfiguration()); + } + + /** + * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call + * on the specified job context. + * @param conf the Configuration object + * @return the table schema + * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context + */ + public static HCatSchema getTableSchema(Configuration conf) throws IOException { + OutputJobInfo jobInfo = getJobInfo(conf); + return jobInfo.getTableInfo().getDataColumns(); + } + + /** + * Check for validity of the output-specification for the job. + * @param context information about the job + * @throws IOException when output should not be attempted + */ + @Override + public void checkOutputSpecs(JobContext context + ) throws IOException, InterruptedException { + getOutputFormat(context).checkOutputSpecs(context); + } + + /** + * Gets the output format instance. + * @param context the job context + * @return the output format instance + * @throws IOException + */ + protected OutputFormat, HCatRecord> getOutputFormat(JobContext context) throws IOException { + OutputJobInfo jobInfo = getJobInfo(context); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + //why do we need this? + configureOutputStorageHandler(context); + return storageHandler.getOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration())); + } + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat#getJobInfo(org.apache.hadoop.conf.Configuration) + * @deprecated use {@link #getJobInfo(org.apache.hadoop.conf.Configuration)} + */ + public static OutputJobInfo getJobInfo(JobContext jobContext) throws IOException { + return getJobInfo(jobContext.getConfiguration()); + } + + /** + * Gets the HCatOuputJobInfo object by reading the Configuration and deserializing + * the string. If InputJobInfo is not present in the configuration, throws an + * exception since that means HCatOutputFormat.setOutput has not been called. + * @param conf the job Configuration object + * @return the OutputJobInfo object + * @throws IOException the IO exception + */ + public static OutputJobInfo getJobInfo(Configuration conf) throws IOException { + String jobString = conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + if (jobString == null) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED); } - /** - * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call - * on the specified job context. - * @param conf the Configuration object - * @return the table schema - * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context - */ - public static HCatSchema getTableSchema(Configuration conf) throws IOException { - OutputJobInfo jobInfo = getJobInfo(conf); - return jobInfo.getTableInfo().getDataColumns(); - } - - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context - ) throws IOException, InterruptedException { - getOutputFormat(context).checkOutputSpecs(context); - } - - /** - * Gets the output format instance. - * @param context the job context - * @return the output format instance - * @throws IOException - */ - protected OutputFormat, HCatRecord> getOutputFormat(JobContext context) throws IOException { - OutputJobInfo jobInfo = getJobInfo(context); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - //why do we need this? - configureOutputStorageHandler(context); - return storageHandler.getOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration())); - } - - /** - * @see org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat#getJobInfo(org.apache.hadoop.conf.Configuration) - * @deprecated use {@link #getJobInfo(org.apache.hadoop.conf.Configuration)} - */ - public static OutputJobInfo getJobInfo(JobContext jobContext) throws IOException { - return getJobInfo(jobContext.getConfiguration()); - } - - /** - * Gets the HCatOuputJobInfo object by reading the Configuration and deserializing - * the string. If InputJobInfo is not present in the configuration, throws an - * exception since that means HCatOutputFormat.setOutput has not been called. - * @param conf the job Configuration object - * @return the OutputJobInfo object - * @throws IOException the IO exception - */ - public static OutputJobInfo getJobInfo(Configuration conf) throws IOException { - String jobString = conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - if (jobString == null) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED); + return (OutputJobInfo) HCatUtil.deserialize(jobString); + } + + /** + * Configure the output storage handler + * @param jobContext the job context + * @throws IOException + */ + @SuppressWarnings("unchecked") + static void configureOutputStorageHandler( + JobContext jobContext) throws IOException { + configureOutputStorageHandler(jobContext, (List) null); + } + + /** + * Configure the output storage handler with allowing specification of missing dynamic partvals + * @param jobContext the job context + * @param dynamicPartVals + * @throws IOException + */ + @SuppressWarnings("unchecked") + static void configureOutputStorageHandler( + JobContext jobContext, List dynamicPartVals) throws IOException { + Configuration conf = jobContext.getConfiguration(); + try { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, jobInfo.getTableInfo().getStorerInfo()); + + Map partitionValues = jobInfo.getPartitionValues(); + String location = jobInfo.getLocation(); + + if (dynamicPartVals != null) { + // dynamic part vals specified + List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); + if (dynamicPartVals.size() != dynamicPartKeys.size()) { + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Unable to configure dynamic partitioning for storage handler, mismatch between" + + " number of partition values obtained[" + dynamicPartVals.size() + + "] and number of partition values required[" + dynamicPartKeys.size() + "]"); + } + for (int i = 0; i < dynamicPartKeys.size(); i++) { + partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i)); } - - return (OutputJobInfo) HCatUtil.deserialize(jobString); - } - - /** - * Configure the output storage handler - * @param jobContext the job context - * @throws IOException - */ - @SuppressWarnings("unchecked") - static void configureOutputStorageHandler( - JobContext jobContext) throws IOException { - configureOutputStorageHandler(jobContext, (List) null); - } - - /** - * Configure the output storage handler with allowing specification of missing dynamic partvals - * @param jobContext the job context - * @param dynamicPartVals - * @throws IOException - */ - @SuppressWarnings("unchecked") - static void configureOutputStorageHandler( - JobContext jobContext, List dynamicPartVals) throws IOException { - Configuration conf = jobContext.getConfiguration(); - try { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, jobInfo.getTableInfo().getStorerInfo()); - - Map partitionValues = jobInfo.getPartitionValues(); - String location = jobInfo.getLocation(); - - if (dynamicPartVals != null) { - // dynamic part vals specified - List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); - if (dynamicPartVals.size() != dynamicPartKeys.size()) { - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Unable to configure dynamic partitioning for storage handler, mismatch between" - + " number of partition values obtained[" + dynamicPartVals.size() - + "] and number of partition values required[" + dynamicPartKeys.size() + "]"); - } - for (int i = 0; i < dynamicPartKeys.size(); i++) { - partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i)); - } // // re-home location, now that we know the rest of the partvals // Table table = jobInfo.getTableInfo().getTable(); @@ -159,85 +159,85 @@ static void configureOutputStorageHandler( // for(FieldSchema schema : table.getPartitionKeys()) { // partitionCols.add(schema.getName()); // } - jobInfo.setPartitionValues(partitionValues); - } - - HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo); - } catch (Exception e) { - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e); - } - } + jobInfo.setPartitionValues(partitionValues); + } + + HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo); + } catch (Exception e) { + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e); + } } - - /** - * Configure the output storage handler, with allowing specification - * of partvals from which it picks the dynamic partvals - * @param context the job context - * @param jobInfo the output job info - * @param fullPartSpec - * @throws IOException - */ - - protected static void configureOutputStorageHandler( - JobContext context, OutputJobInfo jobInfo, - Map fullPartSpec) throws IOException { - List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); - if ((dynamicPartKeys == null) || (dynamicPartKeys.isEmpty())) { - configureOutputStorageHandler(context, (List) null); - } else { - List dynKeyVals = new ArrayList(); - for (String dynamicPartKey : dynamicPartKeys) { - dynKeyVals.add(fullPartSpec.get(dynamicPartKey)); - } - configureOutputStorageHandler(context, dynKeyVals); - } + } + + /** + * Configure the output storage handler, with allowing specification + * of partvals from which it picks the dynamic partvals + * @param context the job context + * @param jobInfo the output job info + * @param fullPartSpec + * @throws IOException + */ + + protected static void configureOutputStorageHandler( + JobContext context, OutputJobInfo jobInfo, + Map fullPartSpec) throws IOException { + List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); + if ((dynamicPartKeys == null) || (dynamicPartKeys.isEmpty())) { + configureOutputStorageHandler(context, (List) null); + } else { + List dynKeyVals = new ArrayList(); + for (String dynamicPartKey : dynamicPartKeys) { + dynKeyVals.add(fullPartSpec.get(dynamicPartKey)); + } + configureOutputStorageHandler(context, dynKeyVals); } + } - protected static void setPartDetails(OutputJobInfo jobInfo, final HCatSchema schema, - Map partMap) throws HCatException, IOException { - List posOfPartCols = new ArrayList(); - List posOfDynPartCols = new ArrayList(); + protected static void setPartDetails(OutputJobInfo jobInfo, final HCatSchema schema, + Map partMap) throws HCatException, IOException { + List posOfPartCols = new ArrayList(); + List posOfDynPartCols = new ArrayList(); - // If partition columns occur in data, we want to remove them. - // So, find out positions of partition columns in schema provided by user. - // We also need to update the output Schema with these deletions. + // If partition columns occur in data, we want to remove them. + // So, find out positions of partition columns in schema provided by user. + // We also need to update the output Schema with these deletions. - // Note that, output storage handlers never sees partition columns in data - // or schema. + // Note that, output storage handlers never sees partition columns in data + // or schema. - HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields()); - for (String partKey : partMap.keySet()) { - Integer idx; - if ((idx = schema.getPosition(partKey)) != null) { - posOfPartCols.add(idx); - schemaWithoutParts.remove(schema.get(partKey)); - } - } + HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields()); + for (String partKey : partMap.keySet()) { + Integer idx; + if ((idx = schema.getPosition(partKey)) != null) { + posOfPartCols.add(idx); + schemaWithoutParts.remove(schema.get(partKey)); + } + } - // Also, if dynamic partitioning is being used, we want to - // set appropriate list of columns for the columns to be dynamically specified. - // These would be partition keys too, so would also need to be removed from - // output schema and partcols - - if (jobInfo.isDynamicPartitioningUsed()) { - for (String partKey : jobInfo.getDynamicPartitioningKeys()) { - Integer idx; - if ((idx = schema.getPosition(partKey)) != null) { - posOfPartCols.add(idx); - posOfDynPartCols.add(idx); - schemaWithoutParts.remove(schema.get(partKey)); - } - } + // Also, if dynamic partitioning is being used, we want to + // set appropriate list of columns for the columns to be dynamically specified. + // These would be partition keys too, so would also need to be removed from + // output schema and partcols + + if (jobInfo.isDynamicPartitioningUsed()) { + for (String partKey : jobInfo.getDynamicPartitioningKeys()) { + Integer idx; + if ((idx = schema.getPosition(partKey)) != null) { + posOfPartCols.add(idx); + posOfDynPartCols.add(idx); + schemaWithoutParts.remove(schema.get(partKey)); } - - HCatUtil.validatePartitionSchema( - new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); - jobInfo.setPosOfPartCols(posOfPartCols); - jobInfo.setPosOfDynPartCols(posOfDynPartCols); - jobInfo.setOutputSchema(schemaWithoutParts); + } } + + HCatUtil.validatePartitionSchema( + new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); + jobInfo.setPosOfPartCols(posOfPartCols); + jobInfo.setPosOfDynPartCols(posOfDynPartCols); + jobInfo.setOutputSchema(schemaWithoutParts); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatInputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatInputFormat.java index 0bcb133..2f24124 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatInputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatInputFormat.java @@ -35,102 +35,102 @@ @InterfaceStability.Evolving public class HCatInputFormat extends HCatBaseInputFormat { - private Configuration conf; - private InputJobInfo inputJobInfo; + private Configuration conf; + private InputJobInfo inputJobInfo; - /** - * @deprecated as of release 0.5, and will be removed in a future release - */ - @Deprecated - public static void setInput(Job job, InputJobInfo inputJobInfo) throws IOException { - setInput(job.getConfiguration(), inputJobInfo); - } + /** + * @deprecated as of release 0.5, and will be removed in a future release + */ + @Deprecated + public static void setInput(Job job, InputJobInfo inputJobInfo) throws IOException { + setInput(job.getConfiguration(), inputJobInfo); + } - /** - * @deprecated as of release 0.5, and will be removed in a future release - */ - @Deprecated - public static void setInput(Configuration conf, InputJobInfo inputJobInfo) throws IOException { - setInput(conf, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()) - .setFilter(inputJobInfo.getFilter()) - .setProperties(inputJobInfo.getProperties()); - } + /** + * @deprecated as of release 0.5, and will be removed in a future release + */ + @Deprecated + public static void setInput(Configuration conf, InputJobInfo inputJobInfo) throws IOException { + setInput(conf, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()) + .setFilter(inputJobInfo.getFilter()) + .setProperties(inputJobInfo.getProperties()); + } - /** - * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String)} - */ - public static HCatInputFormat setInput(Job job, String dbName, String tableName) throws IOException { - return setInput(job.getConfiguration(), dbName, tableName); - } + /** + * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String)} + */ + public static HCatInputFormat setInput(Job job, String dbName, String tableName) throws IOException { + return setInput(job.getConfiguration(), dbName, tableName); + } - /** - * Set inputs to use for the job. This queries the metastore with the given input - * specification and serializes matching partitions into the job conf for use by MR tasks. - * @param conf the job configuration - * @param dbName database name, which if null 'default' is used - * @param tableName table name - * @throws IOException on all errors - */ - public static HCatInputFormat setInput(Configuration conf, String dbName, String tableName) - throws IOException { + /** + * Set inputs to use for the job. This queries the metastore with the given input + * specification and serializes matching partitions into the job conf for use by MR tasks. + * @param conf the job configuration + * @param dbName database name, which if null 'default' is used + * @param tableName table name + * @throws IOException on all errors + */ + public static HCatInputFormat setInput(Configuration conf, String dbName, String tableName) + throws IOException { - Preconditions.checkNotNull(conf, "required argument 'conf' is null"); - Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); + Preconditions.checkNotNull(conf, "required argument 'conf' is null"); + Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); - HCatInputFormat hCatInputFormat = new HCatInputFormat(); - hCatInputFormat.conf = conf; - hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, null, null); + HCatInputFormat hCatInputFormat = new HCatInputFormat(); + hCatInputFormat.conf = conf; + hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, null, null); - try { - InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - - return hCatInputFormat; + try { + InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); + } catch (Exception e) { + throw new IOException(e); } - /** - * Set a filter on the input table. - * @param filter the filter specification, which may be null - * @return this - * @throws IOException on all errors - */ - public HCatInputFormat setFilter(String filter) throws IOException { - // null filters are supported to simplify client code - if (filter != null) { - inputJobInfo = InputJobInfo.create( - inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - filter, - inputJobInfo.getProperties()); - try { - InitializeInput.setInput(conf, inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - } - return this; + return hCatInputFormat; + } + + /** + * Set a filter on the input table. + * @param filter the filter specification, which may be null + * @return this + * @throws IOException on all errors + */ + public HCatInputFormat setFilter(String filter) throws IOException { + // null filters are supported to simplify client code + if (filter != null) { + inputJobInfo = InputJobInfo.create( + inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + filter, + inputJobInfo.getProperties()); + try { + InitializeInput.setInput(conf, inputJobInfo); + } catch (Exception e) { + throw new IOException(e); + } } + return this; + } - /** - * Set properties for the input format. - * @param properties properties for the input specification - * @return this - * @throws IOException on all errors - */ - public HCatInputFormat setProperties(Properties properties) throws IOException { - Preconditions.checkNotNull(properties, "required argument 'properties' is null"); - inputJobInfo = InputJobInfo.create( - inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - inputJobInfo.getFilter(), - properties); - try { - InitializeInput.setInput(conf, inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - return this; + /** + * Set properties for the input format. + * @param properties properties for the input specification + * @return this + * @throws IOException on all errors + */ + public HCatInputFormat setProperties(Properties properties) throws IOException { + Preconditions.checkNotNull(properties, "required argument 'properties' is null"); + inputJobInfo = InputJobInfo.create( + inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + inputJobInfo.getFilter(), + properties); + try { + InitializeInput.setInput(conf, inputJobInfo); + } catch (Exception e) { + throw new IOException(e); } + return this; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatMapRedUtil.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatMapRedUtil.java index a808565..4057a3e 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatMapRedUtil.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatMapRedUtil.java @@ -30,27 +30,27 @@ public class HCatMapRedUtil { - public static TaskAttemptContext createTaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext context) { - return createTaskAttemptContext(new JobConf(context.getConfiguration()), - org.apache.hadoop.mapred.TaskAttemptID.forName(context.getTaskAttemptID().toString()), - Reporter.NULL); - } - - public static org.apache.hadoop.mapreduce.TaskAttemptContext createTaskAttemptContext(Configuration conf, org.apache.hadoop.mapreduce.TaskAttemptID id) { - return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,id); - } - - public static TaskAttemptContext createTaskAttemptContext(JobConf conf, TaskAttemptID id, Progressable progressable) { - return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, id, (Reporter) progressable); - } - - public static org.apache.hadoop.mapred.JobContext createJobContext(org.apache.hadoop.mapreduce.JobContext context) { - return createJobContext((JobConf)context.getConfiguration(), - context.getJobID(), - Reporter.NULL); - } - - public static JobContext createJobContext(JobConf conf, org.apache.hadoop.mapreduce.JobID id, Progressable progressable) { - return ShimLoader.getHadoopShims().getHCatShim().createJobContext(conf, id, (Reporter) progressable); - } + public static TaskAttemptContext createTaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext context) { + return createTaskAttemptContext(new JobConf(context.getConfiguration()), + org.apache.hadoop.mapred.TaskAttemptID.forName(context.getTaskAttemptID().toString()), + Reporter.NULL); + } + + public static org.apache.hadoop.mapreduce.TaskAttemptContext createTaskAttemptContext(Configuration conf, org.apache.hadoop.mapreduce.TaskAttemptID id) { + return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,id); + } + + public static TaskAttemptContext createTaskAttemptContext(JobConf conf, TaskAttemptID id, Progressable progressable) { + return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, id, (Reporter) progressable); + } + + public static org.apache.hadoop.mapred.JobContext createJobContext(org.apache.hadoop.mapreduce.JobContext context) { + return createJobContext((JobConf)context.getConfiguration(), + context.getJobID(), + Reporter.NULL); + } + + public static JobContext createJobContext(JobConf conf, org.apache.hadoop.mapreduce.JobID id, Progressable progressable) { + return ShimLoader.getHadoopShims().getHCatShim().createJobContext(conf, id, (Reporter) progressable); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java index 1a4a8ee..24417d2 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java @@ -53,228 +53,228 @@ * should be given as null. The value is the HCatRecord to write.*/ public class HCatOutputFormat extends HCatBaseOutputFormat { - static final private Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); - - private static int maxDynamicPartitions; - private static boolean harRequested; - - /** - * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) - */ - public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { - setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); - } - - /** - * Set the information about the output to write for the job. This queries the metadata server - * to find the StorageHandler to use for the table. It throws an error if the - * partition is already published. - * @param conf the Configuration object - * @param credentials the Credentials object - * @param outputJobInfo the table output information for the job - * @throws IOException the exception in communicating with the metadata server - */ - @SuppressWarnings("unchecked") - public static void setOutput(Configuration conf, Credentials credentials, - OutputJobInfo outputJobInfo) throws IOException { - HiveMetaStoreClient client = null; - - try { - - HiveConf hiveConf = HCatUtil.getHiveConf(conf); - client = HCatUtil.getHiveClient(hiveConf); - Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), - outputJobInfo.getTableName()); - - List indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE); - - for (String indexName : indexList) { - Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName); - if (!index.isDeferredRebuild()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported"); - } - } - StorageDescriptor sd = table.getTTable().getSd(); + static final private Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); + + private static int maxDynamicPartitions; + private static boolean harRequested; + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) + */ + public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { + setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); + } + + /** + * Set the information about the output to write for the job. This queries the metadata server + * to find the StorageHandler to use for the table. It throws an error if the + * partition is already published. + * @param conf the Configuration object + * @param credentials the Credentials object + * @param outputJobInfo the table output information for the job + * @throws IOException the exception in communicating with the metadata server + */ + @SuppressWarnings("unchecked") + public static void setOutput(Configuration conf, Credentials credentials, + OutputJobInfo outputJobInfo) throws IOException { + HiveMetaStoreClient client = null; + + try { + + HiveConf hiveConf = HCatUtil.getHiveConf(conf); + client = HCatUtil.getHiveClient(hiveConf); + Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), + outputJobInfo.getTableName()); + + List indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE); + + for (String indexName : indexList) { + Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName); + if (!index.isDeferredRebuild()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported"); + } + } + StorageDescriptor sd = table.getTTable().getSd(); + + if (sd.isCompressed()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported"); + } + + if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported"); + } + + if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported"); + } + + if (table.getTTable().getPartitionKeysSize() == 0) { + if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) { + // attempt made to save partition values in non-partitioned table - throw error. + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Partition values specified for non-partitioned table"); + } + // non-partitioned table + outputJobInfo.setPartitionValues(new HashMap()); + + } else { + // partitioned table, we expect partition values + // convert user specified map to have lower case key names + Map valueMap = new HashMap(); + if (outputJobInfo.getPartitionValues() != null) { + for (Map.Entry entry : outputJobInfo.getPartitionValues().entrySet()) { + valueMap.put(entry.getKey().toLowerCase(), entry.getValue()); + } + } - if (sd.isCompressed()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported"); + if ((outputJobInfo.getPartitionValues() == null) + || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) { + // dynamic partition usecase - partition values were null, or not all were specified + // need to figure out which keys are not specified. + List dynamicPartitioningKeys = new ArrayList(); + boolean firstItem = true; + for (FieldSchema fs : table.getPartitionKeys()) { + if (!valueMap.containsKey(fs.getName().toLowerCase())) { + dynamicPartitioningKeys.add(fs.getName().toLowerCase()); } + } - if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported"); - } + if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) { + // If this isn't equal, then bogus key values have been inserted, error out. + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified"); + } - if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported"); - } - - if (table.getTTable().getPartitionKeysSize() == 0) { - if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) { - // attempt made to save partition values in non-partitioned table - throw error. - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Partition values specified for non-partitioned table"); - } - // non-partitioned table - outputJobInfo.setPartitionValues(new HashMap()); - - } else { - // partitioned table, we expect partition values - // convert user specified map to have lower case key names - Map valueMap = new HashMap(); - if (outputJobInfo.getPartitionValues() != null) { - for (Map.Entry entry : outputJobInfo.getPartitionValues().entrySet()) { - valueMap.put(entry.getKey().toLowerCase(), entry.getValue()); - } - } - - if ((outputJobInfo.getPartitionValues() == null) - || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) { - // dynamic partition usecase - partition values were null, or not all were specified - // need to figure out which keys are not specified. - List dynamicPartitioningKeys = new ArrayList(); - boolean firstItem = true; - for (FieldSchema fs : table.getPartitionKeys()) { - if (!valueMap.containsKey(fs.getName().toLowerCase())) { - dynamicPartitioningKeys.add(fs.getName().toLowerCase()); - } - } - - if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) { - // If this isn't equal, then bogus key values have been inserted, error out. - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified"); - } - - outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys); - String dynHash; - if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) { - dynHash = String.valueOf(Math.random()); + outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys); + String dynHash; + if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) { + dynHash = String.valueOf(Math.random()); // LOG.info("New dynHash : ["+dynHash+"]"); // }else{ // LOG.info("Old dynHash : ["+dynHash+"]"); - } - conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash); - - } - - outputJobInfo.setPartitionValues(valueMap); - } - - // To get around hbase failure on single node, see BUG-4383 - conf.set("dfs.client.read.shortcircuit", "false"); - HCatSchema tableSchema = HCatUtil.extractSchema(table); - StorerInfo storerInfo = - InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); - - List partitionCols = new ArrayList(); - for (FieldSchema schema : table.getPartitionKeys()) { - partitionCols.add(schema.getName()); - } - - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); - - //Serialize the output info into the configuration - outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); - outputJobInfo.setOutputSchema(tableSchema); - harRequested = getHarRequested(hiveConf); - outputJobInfo.setHarRequested(harRequested); - maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); - outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions); + } + conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash); - HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo); - - Path tblPath = new Path(table.getTTable().getSd().getLocation()); - - /* Set the umask in conf such that files/dirs get created with table-dir - * permissions. Following three assumptions are made: - * 1. Actual files/dirs creation is done by RecordWriter of underlying - * output format. It is assumed that they use default permissions while creation. - * 2. Default Permissions = FsPermission.getDefault() = 777. - * 3. UMask is honored by underlying filesystem. - */ - - FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask( - tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission())); - - if (Security.getInstance().isSecurityEnabled()) { - Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested); - } - } catch (Exception e) { - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } - } finally { - HCatUtil.closeHiveClientQuietly(client); } - } - - /** - * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hive.hcatalog.data.schema.HCatSchema) - */ - public static void setSchema(final Job job, final HCatSchema schema) throws IOException { - setSchema(job.getConfiguration(), schema); - } - /** - * Set the schema for the data being written out to the partition. The - * table schema is used by default for the partition if this is not called. - * @param conf the job Configuration object - * @param schema the schema for the data - * @throws IOException - */ - public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { - OutputJobInfo jobInfo = getJobInfo(conf); - Map partMap = jobInfo.getPartitionValues(); - setPartDetails(jobInfo, schema, partMap); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); + outputJobInfo.setPartitionValues(valueMap); + } + + // To get around hbase failure on single node, see BUG-4383 + conf.set("dfs.client.read.shortcircuit", "false"); + HCatSchema tableSchema = HCatUtil.extractSchema(table); + StorerInfo storerInfo = + InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); + + List partitionCols = new ArrayList(); + for (FieldSchema schema : table.getPartitionKeys()) { + partitionCols.add(schema.getName()); + } + + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); + + //Serialize the output info into the configuration + outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); + outputJobInfo.setOutputSchema(tableSchema); + harRequested = getHarRequested(hiveConf); + outputJobInfo.setHarRequested(harRequested); + maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); + outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions); + + HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo); + + Path tblPath = new Path(table.getTTable().getSd().getLocation()); + + /* Set the umask in conf such that files/dirs get created with table-dir + * permissions. Following three assumptions are made: + * 1. Actual files/dirs creation is done by RecordWriter of underlying + * output format. It is assumed that they use default permissions while creation. + * 2. Default Permissions = FsPermission.getDefault() = 777. + * 3. UMask is honored by underlying filesystem. + */ + + FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask( + tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission())); + + if (Security.getInstance().isSecurityEnabled()) { + Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested); + } + } catch (Exception e) { + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); + } + } finally { + HCatUtil.closeHiveClientQuietly(client); } - - /** - * Get the record writer for the job. This uses the StorageHandler's default - * OutputFormat to get the record writer. - * @param context the information about the current task - * @return a RecordWriter to write the output for the job - * @throws IOException - * @throws InterruptedException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context) - throws IOException, InterruptedException { - return getOutputFormat(context).getRecordWriter(context); + } + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hive.hcatalog.data.schema.HCatSchema) + */ + public static void setSchema(final Job job, final HCatSchema schema) throws IOException { + setSchema(job.getConfiguration(), schema); + } + + /** + * Set the schema for the data being written out to the partition. The + * table schema is used by default for the partition if this is not called. + * @param conf the job Configuration object + * @param schema the schema for the data + * @throws IOException + */ + public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { + OutputJobInfo jobInfo = getJobInfo(conf); + Map partMap = jobInfo.getPartitionValues(); + setPartDetails(jobInfo, schema, partMap); + conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); + } + + /** + * Get the record writer for the job. This uses the StorageHandler's default + * OutputFormat to get the record writer. + * @param context the information about the current task + * @return a RecordWriter to write the output for the job + * @throws IOException + * @throws InterruptedException + */ + @Override + public RecordWriter, HCatRecord> + getRecordWriter(TaskAttemptContext context) + throws IOException, InterruptedException { + return getOutputFormat(context).getRecordWriter(context); + } + + + /** + * Get the output committer for this output format. This is responsible + * for ensuring the output is committed correctly. + * @param context the task context + * @return an output committer + * @throws IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context + ) throws IOException, InterruptedException { + return getOutputFormat(context).getOutputCommitter(context); + } + + private static int getMaxDynamicPartitions(HiveConf hConf) { + // by default the bounds checking for maximum number of + // dynamic partitions is disabled (-1) + int maxDynamicPartitions = -1; + + if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { + maxDynamicPartitions = hConf.getIntVar( + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); } + return maxDynamicPartitions; + } - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context - ) throws IOException, InterruptedException { - return getOutputFormat(context).getOutputCommitter(context); - } - - private static int getMaxDynamicPartitions(HiveConf hConf) { - // by default the bounds checking for maximum number of - // dynamic partitions is disabled (-1) - int maxDynamicPartitions = -1; - - if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { - maxDynamicPartitions = hConf.getIntVar( - HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); - } - - return maxDynamicPartitions; - } - - private static boolean getHarRequested(HiveConf hConf) { - return hConf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED); - } + private static boolean getHarRequested(HiveConf hConf) { + return hConf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java index 8637110..cb2f3da 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java @@ -46,240 +46,240 @@ */ class HCatRecordReader extends RecordReader { - private static final Logger LOG = LoggerFactory.getLogger(HCatRecordReader.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatRecordReader.class); - private InputErrorTracker errorTracker; + private InputErrorTracker errorTracker; - WritableComparable currentKey; - Writable currentValue; - HCatRecord currentHCatRecord; + WritableComparable currentKey; + Writable currentValue; + HCatRecord currentHCatRecord; - /** The underlying record reader to delegate to. */ - private org.apache.hadoop.mapred.RecordReader baseRecordReader; + /** The underlying record reader to delegate to. */ + private org.apache.hadoop.mapred.RecordReader baseRecordReader; - /** The storage handler used */ - private final HCatStorageHandler storageHandler; + /** The storage handler used */ + private final HCatStorageHandler storageHandler; - private Deserializer deserializer; + private Deserializer deserializer; - private Map valuesNotInDataCols; + private Map valuesNotInDataCols; - private HCatSchema outputSchema = null; - private HCatSchema dataSchema = null; + private HCatSchema outputSchema = null; + private HCatSchema dataSchema = null; - /** - * Instantiates a new hcat record reader. - */ - public HCatRecordReader(HCatStorageHandler storageHandler, - Map valuesNotInDataCols) { - this.storageHandler = storageHandler; - this.valuesNotInDataCols = valuesNotInDataCols; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#initialize( - * org.apache.hadoop.mapreduce.InputSplit, - * org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public void initialize(org.apache.hadoop.mapreduce.InputSplit split, - TaskAttemptContext taskContext) throws IOException, InterruptedException { - - HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); + /** + * Instantiates a new hcat record reader. + */ + public HCatRecordReader(HCatStorageHandler storageHandler, + Map valuesNotInDataCols) { + this.storageHandler = storageHandler; + this.valuesNotInDataCols = valuesNotInDataCols; + } - baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); - createDeserializer(hcatSplit, storageHandler, taskContext); - - // Pull the output schema out of the TaskAttemptContext - outputSchema = (HCatSchema) HCatUtil.deserialize( - taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); - - if (outputSchema == null) { - outputSchema = hcatSplit.getTableSchema(); - } + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#initialize( + * org.apache.hadoop.mapreduce.InputSplit, + * org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public void initialize(org.apache.hadoop.mapreduce.InputSplit split, + TaskAttemptContext taskContext) throws IOException, InterruptedException { - // Pull the table schema out of the Split info - // TODO This should be passed in the TaskAttemptContext instead - dataSchema = hcatSplit.getDataSchema(); - - errorTracker = new InputErrorTracker(taskContext.getConfiguration()); - } - - private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, - HCatStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { - - JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); - HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); - org.apache.hadoop.mapred.InputFormat inputFormat = - HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); - return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, - InternalUtil.createReporter(taskContext)); - } + HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); - private void createDeserializer(HCatSplit hcatSplit, HCatStorageHandler storageHandler, - TaskAttemptContext taskContext) throws IOException { + baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); + createDeserializer(hcatSplit, storageHandler, taskContext); - deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), - taskContext.getConfiguration()); + // Pull the output schema out of the TaskAttemptContext + outputSchema = (HCatSchema) HCatUtil.deserialize( + taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); - try { - InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), - hcatSplit.getPartitionInfo().getTableInfo(), - hcatSplit.getPartitionInfo().getPartitionSchema()); - } catch (SerDeException e) { - throw new IOException("Failed initializing deserializer " - + storageHandler.getSerDeClass().getName(), e); - } + if (outputSchema == null) { + outputSchema = hcatSplit.getTableSchema(); } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() - */ - @Override - public WritableComparable getCurrentKey() - throws IOException, InterruptedException { - return currentKey; + // Pull the table schema out of the Split info + // TODO This should be passed in the TaskAttemptContext instead + dataSchema = hcatSplit.getDataSchema(); + + errorTracker = new InputErrorTracker(taskContext.getConfiguration()); + } + + private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, + HCatStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { + + JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); + HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); + org.apache.hadoop.mapred.InputFormat inputFormat = + HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); + return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, + InternalUtil.createReporter(taskContext)); + } + + private void createDeserializer(HCatSplit hcatSplit, HCatStorageHandler storageHandler, + TaskAttemptContext taskContext) throws IOException { + + deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), + taskContext.getConfiguration()); + + try { + InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), + hcatSplit.getPartitionInfo().getTableInfo(), + hcatSplit.getPartitionInfo().getPartitionSchema()); + } catch (SerDeException e) { + throw new IOException("Failed initializing deserializer " + + storageHandler.getSerDeClass().getName(), e); } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() - */ - @Override - public HCatRecord getCurrentValue() throws IOException, InterruptedException { - return currentHCatRecord; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() + */ + @Override + public WritableComparable getCurrentKey() + throws IOException, InterruptedException { + return currentKey; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() + */ + @Override + public HCatRecord getCurrentValue() throws IOException, InterruptedException { + return currentHCatRecord; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() + */ + @Override + public float getProgress() { + try { + return baseRecordReader.getProgress(); + } catch (IOException e) { + LOG.warn("Exception in HCatRecord reader", e); } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() - */ - @Override - public float getProgress() { - try { - return baseRecordReader.getProgress(); - } catch (IOException e) { - LOG.warn("Exception in HCatRecord reader", e); - } - return 0.0f; // errored + return 0.0f; // errored + } + + /** + * Check if the wrapped RecordReader has another record, and if so convert it into an + * HCatRecord. We both check for records and convert here so a configurable percent of + * bad records can be tolerated. + * + * @return if there is a next record + * @throws IOException on error + * @throws InterruptedException on error + */ + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + if (currentKey == null) { + currentKey = baseRecordReader.createKey(); + currentValue = baseRecordReader.createValue(); } - /** - * Check if the wrapped RecordReader has another record, and if so convert it into an - * HCatRecord. We both check for records and convert here so a configurable percent of - * bad records can be tolerated. - * - * @return if there is a next record - * @throws IOException on error - * @throws InterruptedException on error - */ - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { - if (currentKey == null) { - currentKey = baseRecordReader.createKey(); - currentValue = baseRecordReader.createValue(); - } - - while (baseRecordReader.next(currentKey, currentValue)) { - HCatRecord r = null; - Throwable t = null; - - errorTracker.incRecords(); - - try { - Object o = deserializer.deserialize(currentValue); - r = new LazyHCatRecord(o, deserializer.getObjectInspector()); - } catch (Throwable throwable) { - t = throwable; - } - - if (r == null) { - errorTracker.incErrors(t); - continue; - } - - DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size()); - int i = 0; - for (String fieldName : outputSchema.getFieldNames()) { - if (dataSchema.getPosition(fieldName) != null) { - dr.set(i, r.get(fieldName, dataSchema)); - } else { - dr.set(i, valuesNotInDataCols.get(fieldName)); - } - i++; - } - - currentHCatRecord = dr; - return true; + while (baseRecordReader.next(currentKey, currentValue)) { + HCatRecord r = null; + Throwable t = null; + + errorTracker.incRecords(); + + try { + Object o = deserializer.deserialize(currentValue); + r = new LazyHCatRecord(o, deserializer.getObjectInspector()); + } catch (Throwable throwable) { + t = throwable; + } + + if (r == null) { + errorTracker.incErrors(t); + continue; + } + + DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size()); + int i = 0; + for (String fieldName : outputSchema.getFieldNames()) { + if (dataSchema.getPosition(fieldName) != null) { + dr.set(i, r.get(fieldName, dataSchema)); + } else { + dr.set(i, valuesNotInDataCols.get(fieldName)); } + i++; + } - return false; + currentHCatRecord = dr; + return true; } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#close() - */ - @Override - public void close() throws IOException { - baseRecordReader.close(); + return false; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#close() + */ + @Override + public void close() throws IOException { + baseRecordReader.close(); + } + + /** + * Tracks number of of errors in input and throws a Runtime exception + * if the rate of errors crosses a limit. + *
+ * The intention is to skip over very rare file corruption or incorrect + * input, but catch programmer errors (incorrect format, or incorrect + * deserializers etc). + * + * This class was largely copied from Elephant-Bird (thanks @rangadi!) + * https://github.com/kevinweil/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/mapreduce/input/LzoRecordReader.java + */ + static class InputErrorTracker { + long numRecords; + long numErrors; + + double errorThreshold; // max fraction of errors allowed + long minErrors; // throw error only after this many errors + + InputErrorTracker(Configuration conf) { + errorThreshold = conf.getFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, + HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT); + minErrors = conf.getLong(HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_KEY, + HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_DEFAULT); + numRecords = 0; + numErrors = 0; } - /** - * Tracks number of of errors in input and throws a Runtime exception - * if the rate of errors crosses a limit. - *
- * The intention is to skip over very rare file corruption or incorrect - * input, but catch programmer errors (incorrect format, or incorrect - * deserializers etc). - * - * This class was largely copied from Elephant-Bird (thanks @rangadi!) - * https://github.com/kevinweil/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/mapreduce/input/LzoRecordReader.java - */ - static class InputErrorTracker { - long numRecords; - long numErrors; - - double errorThreshold; // max fraction of errors allowed - long minErrors; // throw error only after this many errors - - InputErrorTracker(Configuration conf) { - errorThreshold = conf.getFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, - HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT); - minErrors = conf.getLong(HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_KEY, - HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_DEFAULT); - numRecords = 0; - numErrors = 0; - } - - void incRecords() { - numRecords++; - } + void incRecords() { + numRecords++; + } - void incErrors(Throwable cause) { - numErrors++; - if (numErrors > numRecords) { - // incorrect use of this class - throw new RuntimeException("Forgot to invoke incRecords()?"); - } - - if (cause == null) { - cause = new Exception("Unknown error"); - } - - if (errorThreshold <= 0) { // no errors are tolerated - throw new RuntimeException("error while reading input records", cause); - } - - LOG.warn("Error while reading an input record (" - + numErrors + " out of " + numRecords + " so far ): ", cause); - - double errRate = numErrors / (double) numRecords; - - // will always excuse the first error. We can decide if single - // error crosses threshold inside close() if we want to. - if (numErrors >= minErrors && errRate > errorThreshold) { - LOG.error(numErrors + " out of " + numRecords - + " crosses configured threshold (" + errorThreshold + ")"); - throw new RuntimeException("error rate while reading input records crossed threshold", cause); - } - } + void incErrors(Throwable cause) { + numErrors++; + if (numErrors > numRecords) { + // incorrect use of this class + throw new RuntimeException("Forgot to invoke incRecords()?"); + } + + if (cause == null) { + cause = new Exception("Unknown error"); + } + + if (errorThreshold <= 0) { // no errors are tolerated + throw new RuntimeException("error while reading input records", cause); + } + + LOG.warn("Error while reading an input record (" + + numErrors + " out of " + numRecords + " so far ): ", cause); + + double errRate = numErrors / (double) numRecords; + + // will always excuse the first error. We can decide if single + // error crosses threshold inside close() if we want to. + if (numErrors >= minErrors && errRate > errorThreshold) { + LOG.error(numErrors + " out of " + numRecords + + " crosses configured threshold (" + errorThreshold + ")"); + throw new RuntimeException("error rate while reading input records crossed threshold", cause); + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java index 6cbe268..d3d5a0f 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java @@ -34,153 +34,153 @@ /** The HCatSplit wrapper around the InputSplit returned by the underlying InputFormat */ public class HCatSplit extends InputSplit - implements Writable, org.apache.hadoop.mapred.InputSplit { - - private static final Logger LOG = LoggerFactory.getLogger(HCatSplit.class); - /** The partition info for the split. */ - private PartInfo partitionInfo; - - /** The split returned by the underlying InputFormat split. */ - private org.apache.hadoop.mapred.InputSplit baseMapRedSplit; - - /** The schema for the HCatTable */ - private HCatSchema tableSchema; - - private HiveConf hiveConf; - - /** - * Instantiates a new hcat split. - */ - public HCatSplit() { - } - - /** - * Instantiates a new hcat split. - * - * @param partitionInfo the partition info - * @param baseMapRedSplit the base mapred split - * @param tableSchema the table level schema - */ - public HCatSplit(PartInfo partitionInfo, - org.apache.hadoop.mapred.InputSplit baseMapRedSplit, - HCatSchema tableSchema) { - - this.partitionInfo = partitionInfo; - // dataSchema can be obtained from partitionInfo.getPartitionSchema() - this.baseMapRedSplit = baseMapRedSplit; - this.tableSchema = tableSchema; - } - - /** - * Gets the partition info. - * @return the partitionInfo - */ - public PartInfo getPartitionInfo() { - return partitionInfo; + implements Writable, org.apache.hadoop.mapred.InputSplit { + + private static final Logger LOG = LoggerFactory.getLogger(HCatSplit.class); + /** The partition info for the split. */ + private PartInfo partitionInfo; + + /** The split returned by the underlying InputFormat split. */ + private org.apache.hadoop.mapred.InputSplit baseMapRedSplit; + + /** The schema for the HCatTable */ + private HCatSchema tableSchema; + + private HiveConf hiveConf; + + /** + * Instantiates a new hcat split. + */ + public HCatSplit() { + } + + /** + * Instantiates a new hcat split. + * + * @param partitionInfo the partition info + * @param baseMapRedSplit the base mapred split + * @param tableSchema the table level schema + */ + public HCatSplit(PartInfo partitionInfo, + org.apache.hadoop.mapred.InputSplit baseMapRedSplit, + HCatSchema tableSchema) { + + this.partitionInfo = partitionInfo; + // dataSchema can be obtained from partitionInfo.getPartitionSchema() + this.baseMapRedSplit = baseMapRedSplit; + this.tableSchema = tableSchema; + } + + /** + * Gets the partition info. + * @return the partitionInfo + */ + public PartInfo getPartitionInfo() { + return partitionInfo; + } + + /** + * Gets the underlying InputSplit. + * @return the baseMapRedSplit + */ + public org.apache.hadoop.mapred.InputSplit getBaseSplit() { + return baseMapRedSplit; + } + + /** + * Gets the data schema. + * @return the table schema + */ + public HCatSchema getDataSchema() { + return this.partitionInfo.getPartitionSchema(); + } + + /** + * Gets the table schema. + * @return the table schema + */ + public HCatSchema getTableSchema() { + return this.tableSchema; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.InputSplit#getLength() + */ + @Override + public long getLength() { + try { + return baseMapRedSplit.getLength(); + } catch (IOException e) { + LOG.warn("Exception in HCatSplit", e); } - - /** - * Gets the underlying InputSplit. - * @return the baseMapRedSplit - */ - public org.apache.hadoop.mapred.InputSplit getBaseSplit() { - return baseMapRedSplit; + return 0; // we errored + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() + */ + @Override + public String[] getLocations() { + try { + return baseMapRedSplit.getLocations(); + } catch (IOException e) { + LOG.warn("Exception in HCatSplit", e); } - - /** - * Gets the data schema. - * @return the table schema - */ - public HCatSchema getDataSchema() { - return this.partitionInfo.getPartitionSchema(); + return new String[0]; // we errored + } + + /* (non-Javadoc) + * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) + */ + @SuppressWarnings("unchecked") + @Override + public void readFields(DataInput input) throws IOException { + String partitionInfoString = WritableUtils.readString(input); + partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString); + + String baseSplitClassName = WritableUtils.readString(input); + org.apache.hadoop.mapred.InputSplit split; + try { + Class splitClass = + (Class) Class.forName(baseSplitClassName); + + //Class.forName().newInstance() does not work if the underlying + //InputSplit has package visibility + Constructor + constructor = + splitClass.getDeclaredConstructor(new Class[]{}); + constructor.setAccessible(true); + + split = constructor.newInstance(); + // read baseSplit from input + ((Writable) split).readFields(input); + this.baseMapRedSplit = split; + } catch (Exception e) { + throw new IOException("Exception from " + baseSplitClassName, e); } - /** - * Gets the table schema. - * @return the table schema - */ - public HCatSchema getTableSchema() { - return this.tableSchema; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.InputSplit#getLength() - */ - @Override - public long getLength() { - try { - return baseMapRedSplit.getLength(); - } catch (IOException e) { - LOG.warn("Exception in HCatSplit", e); - } - return 0; // we errored - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() - */ - @Override - public String[] getLocations() { - try { - return baseMapRedSplit.getLocations(); - } catch (IOException e) { - LOG.warn("Exception in HCatSplit", e); - } - return new String[0]; // we errored - } - - /* (non-Javadoc) - * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) - */ - @SuppressWarnings("unchecked") - @Override - public void readFields(DataInput input) throws IOException { - String partitionInfoString = WritableUtils.readString(input); - partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString); - - String baseSplitClassName = WritableUtils.readString(input); - org.apache.hadoop.mapred.InputSplit split; - try { - Class splitClass = - (Class) Class.forName(baseSplitClassName); - - //Class.forName().newInstance() does not work if the underlying - //InputSplit has package visibility - Constructor - constructor = - splitClass.getDeclaredConstructor(new Class[]{}); - constructor.setAccessible(true); - - split = constructor.newInstance(); - // read baseSplit from input - ((Writable) split).readFields(input); - this.baseMapRedSplit = split; - } catch (Exception e) { - throw new IOException("Exception from " + baseSplitClassName, e); - } - - String tableSchemaString = WritableUtils.readString(input); - tableSchema = (HCatSchema) HCatUtil.deserialize(tableSchemaString); - } - - /* (non-Javadoc) - * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) - */ - @Override - public void write(DataOutput output) throws IOException { - String partitionInfoString = HCatUtil.serialize(partitionInfo); - - // write partitionInfo into output - WritableUtils.writeString(output, partitionInfoString); - - WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); - Writable baseSplitWritable = (Writable) baseMapRedSplit; - //write baseSplit into output - baseSplitWritable.write(output); - - //write the table schema into output - String tableSchemaString = HCatUtil.serialize(tableSchema); - WritableUtils.writeString(output, tableSchemaString); - } + String tableSchemaString = WritableUtils.readString(input); + tableSchema = (HCatSchema) HCatUtil.deserialize(tableSchemaString); + } + + /* (non-Javadoc) + * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) + */ + @Override + public void write(DataOutput output) throws IOException { + String partitionInfoString = HCatUtil.serialize(partitionInfo); + + // write partitionInfo into output + WritableUtils.writeString(output, partitionInfoString); + + WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); + Writable baseSplitWritable = (Writable) baseMapRedSplit; + //write baseSplit into output + baseSplitWritable.write(output); + + //write the table schema into output + String tableSchemaString = HCatUtil.serialize(tableSchema); + WritableUtils.writeString(output, tableSchemaString); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatStorageHandler.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatStorageHandler.java index e45149d..9173133 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatStorageHandler.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatStorageHandler.java @@ -34,87 +34,87 @@ */ public abstract class HCatStorageHandler extends DefaultStorageHandler { - //TODO move this to HiveStorageHandler + //TODO move this to HiveStorageHandler - /** - * This method is called to allow the StorageHandlers the chance - * to populate the JobContext.getConfiguration() with properties that - * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). - * Key value pairs passed into jobProperties is guaranteed to be set in the job's - * configuration object. User's can retrieve "context" information from tableDesc. - * User's should avoid mutating tableDesc and only make changes in jobProperties. - * This method is expected to be idempotent such that a job called with the - * same tableDesc values should return the same key-value pairs in jobProperties. - * Any external state set by this method should remain the same if this method is - * called again. It is up to the user to determine how best guarantee this invariant. - * - * This method in particular is to create a configuration for input. - * @param tableDesc - * @param jobProperties - */ - public abstract void configureInputJobProperties(TableDesc tableDesc, Map jobProperties); + /** + * This method is called to allow the StorageHandlers the chance + * to populate the JobContext.getConfiguration() with properties that + * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). + * Key value pairs passed into jobProperties is guaranteed to be set in the job's + * configuration object. User's can retrieve "context" information from tableDesc. + * User's should avoid mutating tableDesc and only make changes in jobProperties. + * This method is expected to be idempotent such that a job called with the + * same tableDesc values should return the same key-value pairs in jobProperties. + * Any external state set by this method should remain the same if this method is + * called again. It is up to the user to determine how best guarantee this invariant. + * + * This method in particular is to create a configuration for input. + * @param tableDesc + * @param jobProperties + */ + public abstract void configureInputJobProperties(TableDesc tableDesc, Map jobProperties); - //TODO move this to HiveStorageHandler + //TODO move this to HiveStorageHandler - /** - * This method is called to allow the StorageHandlers the chance - * to populate the JobContext.getConfiguration() with properties that - * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). - * Key value pairs passed into jobProperties is guaranteed to be set in the job's - * configuration object. User's can retrieve "context" information from tableDesc. - * User's should avoid mutating tableDesc and only make changes in jobProperties. - * This method is expected to be idempotent such that a job called with the - * same tableDesc values should return the same key-value pairs in jobProperties. - * Any external state set by this method should remain the same if this method is - * called again. It is up to the user to determine how best guarantee this invariant. - * - * This method in particular is to create a configuration for output. - * @param tableDesc - * @param jobProperties - */ - public abstract void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties); + /** + * This method is called to allow the StorageHandlers the chance + * to populate the JobContext.getConfiguration() with properties that + * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). + * Key value pairs passed into jobProperties is guaranteed to be set in the job's + * configuration object. User's can retrieve "context" information from tableDesc. + * User's should avoid mutating tableDesc and only make changes in jobProperties. + * This method is expected to be idempotent such that a job called with the + * same tableDesc values should return the same key-value pairs in jobProperties. + * Any external state set by this method should remain the same if this method is + * called again. It is up to the user to determine how best guarantee this invariant. + * + * This method in particular is to create a configuration for output. + * @param tableDesc + * @param jobProperties + */ + public abstract void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties); - /** - * - * - * @return authorization provider - * @throws HiveException - */ - public abstract HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException; + /** + * + * + * @return authorization provider + * @throws HiveException + */ + public abstract HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException; - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.hive.ql.metadata.HiveStorageHandler# - * configureTableJobProperties(org.apache.hadoop.hive.ql.plan.TableDesc, - * java.util.Map) - */ - @Override - @Deprecated - public final void configureTableJobProperties(TableDesc tableDesc, - Map jobProperties) { - } + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.hive.ql.metadata.HiveStorageHandler# + * configureTableJobProperties(org.apache.hadoop.hive.ql.plan.TableDesc, + * java.util.Map) + */ + @Override + @Deprecated + public final void configureTableJobProperties(TableDesc tableDesc, + Map jobProperties) { + } - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.conf.Configurable#getConf() - */ - @Override - public abstract Configuration getConf(); + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.conf.Configurable#getConf() + */ + @Override + public abstract Configuration getConf(); - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf. - * Configuration) - */ - @Override - public abstract void setConf(Configuration conf); + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf. + * Configuration) + */ + @Override + public abstract void setConf(Configuration conf); - OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { - return new DefaultOutputFormatContainer(outputFormat); - } + OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { + return new DefaultOutputFormatContainer(outputFormat); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java index 783d41a..13faf15 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java @@ -36,152 +36,152 @@ public class HCatTableInfo implements Serializable { - private static final long serialVersionUID = 1L; - - /** The db and table names */ - private final String databaseName; - private final String tableName; - - /** The table schema. */ - private final HCatSchema dataColumns; - private final HCatSchema partitionColumns; - - /** The table being written to */ - private final Table table; - - /** The storer info */ - private StorerInfo storerInfo; - - /** - * Initializes a new HCatTableInfo instance to be used with {@link HCatInputFormat} - * for reading data from a table. - * work with hadoop security, the kerberos principal name of the server - else null - * The principal name should be of the form: - * /_HOST@ like "hcat/_HOST@myrealm.com" - * The special string _HOST will be replaced automatically with the correct host name - * @param databaseName the db name - * @param tableName the table name - * @param dataColumns schema of columns which contain data - * @param partitionColumns schema of partition columns - * @param storerInfo information about storage descriptor - * @param table hive metastore table class - */ - HCatTableInfo( - String databaseName, - String tableName, - HCatSchema dataColumns, - HCatSchema partitionColumns, - StorerInfo storerInfo, - Table table) { - this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.dataColumns = dataColumns; - this.table = table; - this.storerInfo = storerInfo; - this.partitionColumns = partitionColumns; - } - - /** - * Gets the value of databaseName - * @return the databaseName - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * Gets the value of tableName - * @return the tableName - */ - public String getTableName() { - return tableName; - } - - /** - * @return return schema of data columns as defined in meta store - */ - public HCatSchema getDataColumns() { - return dataColumns; - } - - /** - * @return schema of partition columns - */ - public HCatSchema getPartitionColumns() { - return partitionColumns; - } - - /** - * @return the storerInfo - */ - public StorerInfo getStorerInfo() { - return storerInfo; - } - - public String getTableLocation() { - return table.getSd().getLocation(); - } - - /** - * minimize dependency on hive classes so this is package private - * this should eventually no longer be used - * @return hive metastore representation of table - */ - Table getTable() { - return table; - } - - /** - * create an HCatTableInfo instance from the supplied Hive Table instance - * @param table to create an instance from - * @return HCatTableInfo - * @throws IOException - */ - static HCatTableInfo valueOf(Table table) throws IOException { - // Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema, - // but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized - // into the job conf. - org.apache.hadoop.hive.ql.metadata.Table mTable = - new org.apache.hadoop.hive.ql.metadata.Table(table); - HCatSchema schema = HCatUtil.extractSchema(mTable); - StorerInfo storerInfo = - InternalUtil.extractStorerInfo(table.getSd(), table.getParameters()); - HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable); - return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, - partitionColumns, storerInfo, table); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - HCatTableInfo tableInfo = (HCatTableInfo) o; - - if (dataColumns != null ? !dataColumns.equals(tableInfo.dataColumns) : tableInfo.dataColumns != null) - return false; - if (databaseName != null ? !databaseName.equals(tableInfo.databaseName) : tableInfo.databaseName != null) - return false; - if (partitionColumns != null ? !partitionColumns.equals(tableInfo.partitionColumns) : tableInfo.partitionColumns != null) - return false; - if (storerInfo != null ? !storerInfo.equals(tableInfo.storerInfo) : tableInfo.storerInfo != null) return false; - if (table != null ? !table.equals(tableInfo.table) : tableInfo.table != null) return false; - if (tableName != null ? !tableName.equals(tableInfo.tableName) : tableInfo.tableName != null) return false; - - return true; - } - - - @Override - public int hashCode() { - int result = databaseName != null ? databaseName.hashCode() : 0; - result = 31 * result + (tableName != null ? tableName.hashCode() : 0); - result = 31 * result + (dataColumns != null ? dataColumns.hashCode() : 0); - result = 31 * result + (partitionColumns != null ? partitionColumns.hashCode() : 0); - result = 31 * result + (table != null ? table.hashCode() : 0); - result = 31 * result + (storerInfo != null ? storerInfo.hashCode() : 0); - return result; - } + private static final long serialVersionUID = 1L; + + /** The db and table names */ + private final String databaseName; + private final String tableName; + + /** The table schema. */ + private final HCatSchema dataColumns; + private final HCatSchema partitionColumns; + + /** The table being written to */ + private final Table table; + + /** The storer info */ + private StorerInfo storerInfo; + + /** + * Initializes a new HCatTableInfo instance to be used with {@link HCatInputFormat} + * for reading data from a table. + * work with hadoop security, the kerberos principal name of the server - else null + * The principal name should be of the form: + * /_HOST@ like "hcat/_HOST@myrealm.com" + * The special string _HOST will be replaced automatically with the correct host name + * @param databaseName the db name + * @param tableName the table name + * @param dataColumns schema of columns which contain data + * @param partitionColumns schema of partition columns + * @param storerInfo information about storage descriptor + * @param table hive metastore table class + */ + HCatTableInfo( + String databaseName, + String tableName, + HCatSchema dataColumns, + HCatSchema partitionColumns, + StorerInfo storerInfo, + Table table) { + this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.dataColumns = dataColumns; + this.table = table; + this.storerInfo = storerInfo; + this.partitionColumns = partitionColumns; + } + + /** + * Gets the value of databaseName + * @return the databaseName + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * Gets the value of tableName + * @return the tableName + */ + public String getTableName() { + return tableName; + } + + /** + * @return return schema of data columns as defined in meta store + */ + public HCatSchema getDataColumns() { + return dataColumns; + } + + /** + * @return schema of partition columns + */ + public HCatSchema getPartitionColumns() { + return partitionColumns; + } + + /** + * @return the storerInfo + */ + public StorerInfo getStorerInfo() { + return storerInfo; + } + + public String getTableLocation() { + return table.getSd().getLocation(); + } + + /** + * minimize dependency on hive classes so this is package private + * this should eventually no longer be used + * @return hive metastore representation of table + */ + Table getTable() { + return table; + } + + /** + * create an HCatTableInfo instance from the supplied Hive Table instance + * @param table to create an instance from + * @return HCatTableInfo + * @throws IOException + */ + static HCatTableInfo valueOf(Table table) throws IOException { + // Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema, + // but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized + // into the job conf. + org.apache.hadoop.hive.ql.metadata.Table mTable = + new org.apache.hadoop.hive.ql.metadata.Table(table); + HCatSchema schema = HCatUtil.extractSchema(mTable); + StorerInfo storerInfo = + InternalUtil.extractStorerInfo(table.getSd(), table.getParameters()); + HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable); + return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, + partitionColumns, storerInfo, table); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + HCatTableInfo tableInfo = (HCatTableInfo) o; + + if (dataColumns != null ? !dataColumns.equals(tableInfo.dataColumns) : tableInfo.dataColumns != null) + return false; + if (databaseName != null ? !databaseName.equals(tableInfo.databaseName) : tableInfo.databaseName != null) + return false; + if (partitionColumns != null ? !partitionColumns.equals(tableInfo.partitionColumns) : tableInfo.partitionColumns != null) + return false; + if (storerInfo != null ? !storerInfo.equals(tableInfo.storerInfo) : tableInfo.storerInfo != null) return false; + if (table != null ? !table.equals(tableInfo.table) : tableInfo.table != null) return false; + if (tableName != null ? !tableName.equals(tableInfo.tableName) : tableInfo.tableName != null) return false; + + return true; + } + + + @Override + public int hashCode() { + int result = databaseName != null ? databaseName.hashCode() : 0; + result = 31 * result + (tableName != null ? tableName.hashCode() : 0); + result = 31 * result + (dataColumns != null ? dataColumns.hashCode() : 0); + result = 31 * result + (partitionColumns != null ? partitionColumns.hashCode() : 0); + result = 31 * result + (table != null ? table.hashCode() : 0); + result = 31 * result + (storerInfo != null ? storerInfo.hashCode() : 0); + return result; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java index fae5573..0d6e031 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java @@ -48,124 +48,124 @@ */ class InitializeInput { - private static final Logger LOG = LoggerFactory.getLogger(InitializeInput.class); - - /** - * @see org.apache.hive.hcatalog.mapreduce.InitializeInput#setInput(org.apache.hadoop.conf.Configuration, InputJobInfo) - */ - public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception { - setInput(job.getConfiguration(), theirInputJobInfo); - } - - /** - * Set the input to use for the Job. This queries the metadata server with the specified - * partition predicates, gets the matching partitions, and puts the information in the job - * configuration object. - * - * To ensure a known InputJobInfo state, only the database name, table name, filter, and - * properties are preserved. All other modification from the given InputJobInfo are discarded. - * - * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: - * {code} - * InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( - * job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); - * {code} - * - * @param conf the job Configuration object - * @param theirInputJobInfo information on the Input to read - * @throws Exception - */ - public static void setInput(Configuration conf, - InputJobInfo theirInputJobInfo) throws Exception { - InputJobInfo inputJobInfo = InputJobInfo.create( - theirInputJobInfo.getDatabaseName(), - theirInputJobInfo.getTableName(), - theirInputJobInfo.getFilter(), - theirInputJobInfo.getProperties()); - conf.set( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(getInputJobInfo(conf, inputJobInfo, null))); - } + private static final Logger LOG = LoggerFactory.getLogger(InitializeInput.class); + + /** + * @see org.apache.hive.hcatalog.mapreduce.InitializeInput#setInput(org.apache.hadoop.conf.Configuration, InputJobInfo) + */ + public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception { + setInput(job.getConfiguration(), theirInputJobInfo); + } + + /** + * Set the input to use for the Job. This queries the metadata server with the specified + * partition predicates, gets the matching partitions, and puts the information in the job + * configuration object. + * + * To ensure a known InputJobInfo state, only the database name, table name, filter, and + * properties are preserved. All other modification from the given InputJobInfo are discarded. + * + * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: + * {code} + * InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( + * job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + * {code} + * + * @param conf the job Configuration object + * @param theirInputJobInfo information on the Input to read + * @throws Exception + */ + public static void setInput(Configuration conf, + InputJobInfo theirInputJobInfo) throws Exception { + InputJobInfo inputJobInfo = InputJobInfo.create( + theirInputJobInfo.getDatabaseName(), + theirInputJobInfo.getTableName(), + theirInputJobInfo.getFilter(), + theirInputJobInfo.getProperties()); + conf.set( + HCatConstants.HCAT_KEY_JOB_INFO, + HCatUtil.serialize(getInputJobInfo(conf, inputJobInfo, null))); + } + + /** + * Returns the given InputJobInfo after populating with data queried from the metadata service. + */ + private static InputJobInfo getInputJobInfo( + Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception { + HiveMetaStoreClient client = null; + HiveConf hiveConf = null; + try { + if (conf != null) { + hiveConf = HCatUtil.getHiveConf(conf); + } else { + hiveConf = new HiveConf(HCatInputFormat.class); + } + client = HCatUtil.getHiveClient(hiveConf); + Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName()); + + List partInfoList = new ArrayList(); + + inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); + if (table.getPartitionKeys().size() != 0) { + //Partitioned table + List parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + inputJobInfo.getFilter(), + (short) -1); + + // Default to 100,000 partitions if hive.metastore.maxpartition is not defined + int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000); + if (parts != null && parts.size() > maxPart) { + throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size()); + } - /** - * Returns the given InputJobInfo after populating with data queried from the metadata service. - */ - private static InputJobInfo getInputJobInfo( - Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception { - HiveMetaStoreClient client = null; - HiveConf hiveConf = null; - try { - if (conf != null) { - hiveConf = HCatUtil.getHiveConf(conf); - } else { - hiveConf = new HiveConf(HCatInputFormat.class); - } - client = HCatUtil.getHiveClient(hiveConf); - Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName()); - - List partInfoList = new ArrayList(); - - inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); - if (table.getPartitionKeys().size() != 0) { - //Partitioned table - List parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - inputJobInfo.getFilter(), - (short) -1); - - // Default to 100,000 partitions if hive.metastore.maxpartition is not defined - int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000); - if (parts != null && parts.size() > maxPart) { - throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size()); - } - - // populate partition info - for (Partition ptn : parts) { - HCatSchema schema = HCatUtil.extractSchema( - new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn)); - PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), - ptn.getParameters(), conf, inputJobInfo); - partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); - partInfoList.add(partInfo); - } - - } else { - //Non partitioned table - HCatSchema schema = HCatUtil.extractSchema(table); - PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), - table.getParameters(), conf, inputJobInfo); - partInfo.setPartitionValues(new HashMap()); - partInfoList.add(partInfo); - } - inputJobInfo.setPartitions(partInfoList); - - return inputJobInfo; - } finally { - HCatUtil.closeHiveClientQuietly(client); + // populate partition info + for (Partition ptn : parts) { + HCatSchema schema = HCatUtil.extractSchema( + new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn)); + PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), + ptn.getParameters(), conf, inputJobInfo); + partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); + partInfoList.add(partInfo); } + } else { + //Non partitioned table + HCatSchema schema = HCatUtil.extractSchema(table); + PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), + table.getParameters(), conf, inputJobInfo); + partInfo.setPartitionValues(new HashMap()); + partInfoList.add(partInfo); + } + inputJobInfo.setPartitions(partInfoList); + + return inputJobInfo; + } finally { + HCatUtil.closeHiveClientQuietly(client); } - private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, - Map parameters, Configuration conf, - InputJobInfo inputJobInfo) throws IOException { + } - StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); + private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, + Map parameters, Configuration conf, + InputJobInfo inputJobInfo) throws IOException { - Properties hcatProperties = new Properties(); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); + StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); - // copy the properties from storageHandler to jobProperties - Map jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); + Properties hcatProperties = new Properties(); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); - for (String key : parameters.keySet()) { - hcatProperties.put(key, parameters.get(key)); - } - // FIXME - // Bloating partinfo with inputJobInfo is not good - return new PartInfo(schema, storageHandler, sd.getLocation(), - hcatProperties, jobProperties, inputJobInfo.getTableInfo()); + // copy the properties from storageHandler to jobProperties + Map jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); + + for (String key : parameters.keySet()) { + hcatProperties.put(key, parameters.get(key)); } + // FIXME + // Bloating partinfo with inputJobInfo is not good + return new PartInfo(schema, storageHandler, sd.getLocation(), + hcatProperties, jobProperties, inputJobInfo.getTableInfo()); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java index 9478f31..9cfb1be 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java @@ -45,157 +45,157 @@ @InterfaceStability.Evolving public class InputJobInfo implements Serializable { - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The db and table names. */ - private final String databaseName; - private final String tableName; - - /** meta information of the table to be read from */ - private HCatTableInfo tableInfo; - - /** The partition filter */ - private String filter; - - /** The list of partitions matching the filter. */ - transient private List partitions; - - /** implementation specific job properties */ - private Properties properties; - - /** - * Initializes a new InputJobInfo - * for reading data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param filter the partition filter - * @param properties implementation specific job properties - */ - public static InputJobInfo create(String databaseName, - String tableName, - String filter, - Properties properties) { - return new InputJobInfo(databaseName, tableName, filter, properties); - } - - /** - * Initializes a new InputJobInfo - * for reading data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param filter the partition filter - */ - @Deprecated - public static InputJobInfo create(String databaseName, - String tableName, - String filter) { - return create(databaseName, tableName, filter, null); - } - - - private InputJobInfo(String databaseName, - String tableName, - String filter, - Properties properties) { - this.databaseName = (databaseName == null) ? - MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.filter = filter; - this.properties = properties == null ? new Properties() : properties; - } - - /** - * Gets the value of databaseName - * @return the databaseName - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * Gets the value of tableName - * @return the tableName - */ - public String getTableName() { - return tableName; - } - - /** - * Gets the table's meta information - * @return the HCatTableInfo - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } - - /** - * set the tablInfo instance - * this should be the same instance - * determined by this object's DatabaseName and TableName - * @param tableInfo - */ - void setTableInfo(HCatTableInfo tableInfo) { - this.tableInfo = tableInfo; - } - - /** - * Gets the value of partition filter - * @return the filter string - */ - public String getFilter() { - return filter; - } - - /** - * @return partition info - */ - public List getPartitions() { - return partitions; - } - - /** - * @return partition info list - */ - void setPartitions(List partitions) { - this.partitions = partitions; - } - - /** - * Set/Get Property information to be passed down to *StorageHandler implementation - * put implementation specific storage handler configurations here - * @return the implementation specific job properties - */ - public Properties getProperties() { - return properties; - } - - /** - * Serialize this object, compressing the partitions which can exceed the - * allowed jobConf size. - * @see HCATALOG-453 - */ - private void writeObject(ObjectOutputStream oos) - throws IOException { - oos.defaultWriteObject(); - Deflater def = new Deflater(Deflater.BEST_COMPRESSION); - ObjectOutputStream partInfoWriter = - new ObjectOutputStream(new DeflaterOutputStream(oos, def)); - partInfoWriter.writeObject(partitions); - partInfoWriter.close(); - } - - /** - * Deserialize this object, decompressing the partitions which can exceed the - * allowed jobConf size. - * @see HCATALOG-453 - */ - @SuppressWarnings("unchecked") - private void readObject(ObjectInputStream ois) - throws IOException, ClassNotFoundException { - ois.defaultReadObject(); - ObjectInputStream partInfoReader = - new ObjectInputStream(new InflaterInputStream(ois)); - partitions = (List)partInfoReader.readObject(); - } + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The db and table names. */ + private final String databaseName; + private final String tableName; + + /** meta information of the table to be read from */ + private HCatTableInfo tableInfo; + + /** The partition filter */ + private String filter; + + /** The list of partitions matching the filter. */ + transient private List partitions; + + /** implementation specific job properties */ + private Properties properties; + + /** + * Initializes a new InputJobInfo + * for reading data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param filter the partition filter + * @param properties implementation specific job properties + */ + public static InputJobInfo create(String databaseName, + String tableName, + String filter, + Properties properties) { + return new InputJobInfo(databaseName, tableName, filter, properties); + } + + /** + * Initializes a new InputJobInfo + * for reading data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param filter the partition filter + */ + @Deprecated + public static InputJobInfo create(String databaseName, + String tableName, + String filter) { + return create(databaseName, tableName, filter, null); + } + + + private InputJobInfo(String databaseName, + String tableName, + String filter, + Properties properties) { + this.databaseName = (databaseName == null) ? + MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.filter = filter; + this.properties = properties == null ? new Properties() : properties; + } + + /** + * Gets the value of databaseName + * @return the databaseName + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * Gets the value of tableName + * @return the tableName + */ + public String getTableName() { + return tableName; + } + + /** + * Gets the table's meta information + * @return the HCatTableInfo + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } + + /** + * set the tablInfo instance + * this should be the same instance + * determined by this object's DatabaseName and TableName + * @param tableInfo + */ + void setTableInfo(HCatTableInfo tableInfo) { + this.tableInfo = tableInfo; + } + + /** + * Gets the value of partition filter + * @return the filter string + */ + public String getFilter() { + return filter; + } + + /** + * @return partition info + */ + public List getPartitions() { + return partitions; + } + + /** + * @return partition info list + */ + void setPartitions(List partitions) { + this.partitions = partitions; + } + + /** + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here + * @return the implementation specific job properties + */ + public Properties getProperties() { + return properties; + } + + /** + * Serialize this object, compressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + oos.defaultWriteObject(); + Deflater def = new Deflater(Deflater.BEST_COMPRESSION); + ObjectOutputStream partInfoWriter = + new ObjectOutputStream(new DeflaterOutputStream(oos, def)); + partInfoWriter.writeObject(partitions); + partInfoWriter.close(); + } + + /** + * Deserialize this object, decompressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + @SuppressWarnings("unchecked") + private void readObject(ObjectInputStream ois) + throws IOException, ClassNotFoundException { + ois.defaultReadObject(); + ObjectInputStream partInfoReader = + new ObjectInputStream(new InflaterInputStream(ois)); + partitions = (List)partInfoReader.readObject(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java index 996b8f4..bb1cb18 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java @@ -57,162 +57,162 @@ import java.util.Properties; class InternalUtil { - private static final Logger LOG = LoggerFactory.getLogger(InternalUtil.class); + private static final Logger LOG = LoggerFactory.getLogger(InternalUtil.class); - static StorerInfo extractStorerInfo(StorageDescriptor sd, Map properties) throws IOException { - Properties hcatProperties = new Properties(); - for (String key : properties.keySet()) { - hcatProperties.put(key, properties.get(key)); - } - - // also populate with StorageDescriptor->SerDe.Parameters - for (Map.Entry param : - sd.getSerdeInfo().getParameters().entrySet()) { - hcatProperties.put(param.getKey(), param.getValue()); - } - - - return new StorerInfo( - sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), - hcatProperties); + static StorerInfo extractStorerInfo(StorageDescriptor sd, Map properties) throws IOException { + Properties hcatProperties = new Properties(); + for (String key : properties.keySet()) { + hcatProperties.put(key, properties.get(key)); } - static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { - - if (outputSchema == null) { - throw new IOException("Invalid output schema specified"); - } + // also populate with StorageDescriptor->SerDe.Parameters + for (Map.Entry param : + sd.getSerdeInfo().getParameters().entrySet()) { + hcatProperties.put(param.getKey(), param.getValue()); + } - List fieldInspectors = new ArrayList(); - List fieldNames = new ArrayList(); - for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { - TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); + return new StorerInfo( + sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), + properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), + hcatProperties); + } - fieldNames.add(hcatFieldSchema.getName()); - fieldInspectors.add(getObjectInspector(type)); - } + static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { - StructObjectInspector structInspector = ObjectInspectorFactory. - getStandardStructObjectInspector(fieldNames, fieldInspectors); - return structInspector; + if (outputSchema == null) { + throw new IOException("Invalid output schema specified"); } - private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException { - - switch (type.getCategory()) { - - case PRIMITIVE: - PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; - return PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); + List fieldInspectors = new ArrayList(); + List fieldNames = new ArrayList(); - case MAP: - MapTypeInfo mapType = (MapTypeInfo) type; - MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( - getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); - return mapInspector; + for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { + TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); - case LIST: - ListTypeInfo listType = (ListTypeInfo) type; - ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( - getObjectInspector(listType.getListElementTypeInfo())); - return listInspector; + fieldNames.add(hcatFieldSchema.getName()); + fieldInspectors.add(getObjectInspector(type)); + } - case STRUCT: - StructTypeInfo structType = (StructTypeInfo) type; - List fieldTypes = structType.getAllStructFieldTypeInfos(); + StructObjectInspector structInspector = ObjectInspectorFactory. + getStandardStructObjectInspector(fieldNames, fieldInspectors); + return structInspector; + } - List fieldInspectors = new ArrayList(); - for (TypeInfo fieldType : fieldTypes) { - fieldInspectors.add(getObjectInspector(fieldType)); - } + private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException { - StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - structType.getAllStructFieldNames(), fieldInspectors); - return structInspector; + switch (type.getCategory()) { - default: - throw new IOException("Unknown field schema type"); - } - } + case PRIMITIVE: + PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; + return PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); - //TODO this has to find a better home, it's also hardcoded as default in hive would be nice - // if the default was decided by the serde - static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) - throws SerDeException { - serDe.initialize(conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema())); - } + case MAP: + MapTypeInfo mapType = (MapTypeInfo) type; + MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( + getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); + return mapInspector; - static void initializeDeserializer(Deserializer deserializer, Configuration conf, - HCatTableInfo info, HCatSchema schema) throws SerDeException { - Properties props = getSerdeProperties(info, schema); - LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); - deserializer.initialize(conf, props); - } + case LIST: + ListTypeInfo listType = (ListTypeInfo) type; + ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( + getObjectInspector(listType.getListElementTypeInfo())); + return listInspector; - private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) - throws SerDeException { - Properties props = new Properties(); - List fields = HCatUtil.getFieldSchemaList(s.getFields()); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); + case STRUCT: + StructTypeInfo structType = (StructTypeInfo) type; + List fieldTypes = structType.getAllStructFieldTypeInfos(); - // setting these props to match LazySimpleSerde - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); + List fieldInspectors = new ArrayList(); + for (TypeInfo fieldType : fieldTypes) { + fieldInspectors.add(getObjectInspector(fieldType)); + } - //add props from params set in table schema - props.putAll(info.getStorerInfo().getProperties()); + StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( + structType.getAllStructFieldNames(), fieldInspectors); + return structInspector; - return props; + default: + throw new IOException("Unknown field schema type"); } - - static Reporter createReporter(TaskAttemptContext context) { - return new ProgressReporter(context); + } + + //TODO this has to find a better home, it's also hardcoded as default in hive would be nice + // if the default was decided by the serde + static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) + throws SerDeException { + serDe.initialize(conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema())); + } + + static void initializeDeserializer(Deserializer deserializer, Configuration conf, + HCatTableInfo info, HCatSchema schema) throws SerDeException { + Properties props = getSerdeProperties(info, schema); + LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); + deserializer.initialize(conf, props); + } + + private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) + throws SerDeException { + Properties props = new Properties(); + List fields = HCatUtil.getFieldSchemaList(s.getFields()); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, + MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, + MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); + + // setting these props to match LazySimpleSerde + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); + + //add props from params set in table schema + props.putAll(info.getStorerInfo().getProperties()); + + return props; + } + + static Reporter createReporter(TaskAttemptContext context) { + return new ProgressReporter(context); + } + + /** + * Casts an InputSplit into a HCatSplit, providing a useful error message if the cast fails. + * @param split the InputSplit + * @return the HCatSplit + * @throws IOException + */ + public static HCatSplit castToHCatSplit(InputSplit split) throws IOException { + if (split instanceof HCatSplit) { + return (HCatSplit) split; + } else { + throw new IOException("Split must be " + HCatSplit.class.getName() + + " but found " + split.getClass().getName()); } - - /** - * Casts an InputSplit into a HCatSplit, providing a useful error message if the cast fails. - * @param split the InputSplit - * @return the HCatSplit - * @throws IOException - */ - public static HCatSplit castToHCatSplit(InputSplit split) throws IOException { - if (split instanceof HCatSplit) { - return (HCatSplit) split; - } else { - throw new IOException("Split must be " + HCatSplit.class.getName() - + " but found " + split.getClass().getName()); - } + } + + + static Map createPtnKeyValueMap(Table table, Partition ptn) + throws IOException { + List values = ptn.getValues(); + if (values.size() != table.getPartitionKeys().size()) { + throw new IOException( + "Partition values in partition inconsistent with table definition, table " + + table.getTableName() + " has " + + table.getPartitionKeys().size() + + " partition keys, partition has " + values.size() + + "partition values"); } + Map ptnKeyValues = new HashMap(); - static Map createPtnKeyValueMap(Table table, Partition ptn) - throws IOException { - List values = ptn.getValues(); - if (values.size() != table.getPartitionKeys().size()) { - throw new IOException( - "Partition values in partition inconsistent with table definition, table " - + table.getTableName() + " has " - + table.getPartitionKeys().size() - + " partition keys, partition has " + values.size() - + "partition values"); - } - - Map ptnKeyValues = new HashMap(); - - int i = 0; - for (FieldSchema schema : table.getPartitionKeys()) { - // CONCERN : the way this mapping goes, the order *needs* to be - // preserved for table.getPartitionKeys() and ptn.getValues() - ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); - i++; - } - - return ptnKeyValues; + int i = 0; + for (FieldSchema schema : table.getPartitionKeys()) { + // CONCERN : the way this mapping goes, the order *needs* to be + // preserved for table.getPartitionKeys() and ptn.getValues() + ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); + i++; } + + return ptnKeyValues; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/MultiOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/MultiOutputFormat.java index a2c670f..ab0e1ab 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/MultiOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/MultiOutputFormat.java @@ -136,488 +136,488 @@ */ public class MultiOutputFormat extends OutputFormat { - private static final Logger LOGGER = LoggerFactory.getLogger(MultiOutputFormat.class.getName()); - private static final String MO_ALIASES = "mapreduce.multiout.aliases"; - private static final String MO_ALIAS = "mapreduce.multiout.alias"; - private static final String CONF_KEY_DELIM = "%%"; - private static final String CONF_VALUE_DELIM = ";;"; - private static final String COMMA_DELIM = ","; - private static final List configsToOverride = new ArrayList(); - private static final Map configsToMerge = new HashMap(); - - static { - configsToOverride.add("mapred.output.dir"); - configsToOverride.add(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( - HadoopShims.HCatHadoopShims.PropertyName.CACHE_SYMLINK)); - configsToMerge.put(JobContext.JOB_NAMENODES, COMMA_DELIM); - configsToMerge.put("tmpfiles", COMMA_DELIM); - configsToMerge.put("tmpjars", COMMA_DELIM); - configsToMerge.put("tmparchives", COMMA_DELIM); - configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( - HadoopShims.HCatHadoopShims.PropertyName.CACHE_ARCHIVES), COMMA_DELIM); - configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( - HadoopShims.HCatHadoopShims.PropertyName.CACHE_FILES), COMMA_DELIM); - String fileSep; - if (HCatUtil.isHadoop23()) { - fileSep = ","; + private static final Logger LOGGER = LoggerFactory.getLogger(MultiOutputFormat.class.getName()); + private static final String MO_ALIASES = "mapreduce.multiout.aliases"; + private static final String MO_ALIAS = "mapreduce.multiout.alias"; + private static final String CONF_KEY_DELIM = "%%"; + private static final String CONF_VALUE_DELIM = ";;"; + private static final String COMMA_DELIM = ","; + private static final List configsToOverride = new ArrayList(); + private static final Map configsToMerge = new HashMap(); + + static { + configsToOverride.add("mapred.output.dir"); + configsToOverride.add(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( + HadoopShims.HCatHadoopShims.PropertyName.CACHE_SYMLINK)); + configsToMerge.put(JobContext.JOB_NAMENODES, COMMA_DELIM); + configsToMerge.put("tmpfiles", COMMA_DELIM); + configsToMerge.put("tmpjars", COMMA_DELIM); + configsToMerge.put("tmparchives", COMMA_DELIM); + configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( + HadoopShims.HCatHadoopShims.PropertyName.CACHE_ARCHIVES), COMMA_DELIM); + configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( + HadoopShims.HCatHadoopShims.PropertyName.CACHE_FILES), COMMA_DELIM); + String fileSep; + if (HCatUtil.isHadoop23()) { + fileSep = ","; + } else { + fileSep = System.getProperty("path.separator"); + } + configsToMerge.put("mapred.job.classpath.archives", fileSep); + configsToMerge.put("mapred.job.classpath.files", fileSep); + } + + /** + * Get a JobConfigurer instance that will support configuration of the job + * for multiple output formats. + * + * @param job the mapreduce job to be submitted + * @return JobConfigurer + */ + public static JobConfigurer createConfigurer(Job job) { + return JobConfigurer.create(job); + } + + /** + * Get the JobContext with the related OutputFormat configuration populated given the alias + * and the actual JobContext + * @param alias the name given to the OutputFormat configuration + * @param context the JobContext + * @return a copy of the JobContext with the alias configuration populated + */ + public static JobContext getJobContext(String alias, JobContext context) { + String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); + JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createJobContext( + context.getConfiguration(), context.getJobID()); + addToConfig(aliasConf, aliasContext.getConfiguration()); + return aliasContext; + } + + /** + * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias + * and the actual TaskAttemptContext + * @param alias the name given to the OutputFormat configuration + * @param context the Mapper or Reducer Context + * @return a copy of the TaskAttemptContext with the alias configuration populated + */ + public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { + String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); + TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConfiguration(), context.getTaskAttemptID()); + addToConfig(aliasConf, aliasContext.getConfiguration()); + return aliasContext; + } + + /** + * Write the output key and value using the OutputFormat defined by the + * alias. + * + * @param alias the name given to the OutputFormat configuration + * @param key the output key to be written + * @param value the output value to be written + * @param context the Mapper or Reducer Context + * @throws IOException + * @throws InterruptedException + */ + public static void write(String alias, K key, V value, TaskInputOutputContext context) + throws IOException, InterruptedException { + KeyValue keyval = new KeyValue(key, value); + context.write(new Text(alias), keyval); + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + for (String alias : getOutputFormatAliases(context)) { + LOGGER.debug("Calling checkOutputSpecs for alias: " + alias); + JobContext aliasContext = getJobContext(alias, context); + OutputFormat outputFormat = getOutputFormatInstance(aliasContext); + outputFormat.checkOutputSpecs(aliasContext); + // Copy credentials and any new config added back to JobContext + context.getCredentials().addAll(aliasContext.getCredentials()); + setAliasConf(alias, context, aliasContext); + } + } + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) + throws IOException, + InterruptedException { + return new MultiRecordWriter(context); + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, + InterruptedException { + return new MultiOutputCommitter(context); + } + + private static OutputFormat getOutputFormatInstance(JobContext context) { + OutputFormat outputFormat; + try { + outputFormat = ReflectionUtils.newInstance(context.getOutputFormatClass(), + context.getConfiguration()); + } catch (ClassNotFoundException e) { + throw new IllegalStateException(e); + } + return outputFormat; + } + + private static String[] getOutputFormatAliases(JobContext context) { + return context.getConfiguration().getStrings(MO_ALIASES); + } + + /** + * Compare the aliasContext with userJob and add the differing configuration + * as mapreduce.multiout.alias..conf to the userJob. + *

+ * Merge config like tmpjars, tmpfile, tmparchives, + * mapreduce.job.hdfs-servers that are directly handled by JobClient and add + * them to userJob. + *

+ * Add mapred.output.dir config to userJob. + * + * @param alias alias name associated with a OutputFormat + * @param userJob reference to Job that the user is going to submit + * @param aliasContext JobContext populated with OutputFormat related + * configuration. + */ + private static void setAliasConf(String alias, JobContext userJob, JobContext aliasContext) { + Configuration userConf = userJob.getConfiguration(); + StringBuilder builder = new StringBuilder(); + for (Entry conf : aliasContext.getConfiguration()) { + String key = conf.getKey(); + String value = conf.getValue(); + String jobValue = userConf.getRaw(key); + if (jobValue == null || !jobValue.equals(value)) { + if (configsToMerge.containsKey(key)) { + String mergedValue = getMergedConfValue(jobValue, value, configsToMerge.get(key)); + userConf.set(key, mergedValue); } else { - fileSep = System.getProperty("path.separator"); + if (configsToOverride.contains(key)) { + userConf.set(key, value); + } + builder.append(key).append(CONF_KEY_DELIM).append(value) + .append(CONF_VALUE_DELIM); } - configsToMerge.put("mapred.job.classpath.archives", fileSep); - configsToMerge.put("mapred.job.classpath.files", fileSep); + } } - - /** - * Get a JobConfigurer instance that will support configuration of the job - * for multiple output formats. - * - * @param job the mapreduce job to be submitted - * @return JobConfigurer - */ - public static JobConfigurer createConfigurer(Job job) { - return JobConfigurer.create(job); + if (builder.length() > CONF_VALUE_DELIM.length()) { + builder.delete(builder.length() - CONF_VALUE_DELIM.length(), builder.length()); + userConf.set(getAliasConfName(alias), builder.toString()); } + } - /** - * Get the JobContext with the related OutputFormat configuration populated given the alias - * and the actual JobContext - * @param alias the name given to the OutputFormat configuration - * @param context the JobContext - * @return a copy of the JobContext with the alias configuration populated - */ - public static JobContext getJobContext(String alias, JobContext context) { - String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); - JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createJobContext( - context.getConfiguration(), context.getJobID()); - addToConfig(aliasConf, aliasContext.getConfiguration()); - return aliasContext; + private static String getMergedConfValue(String originalValues, String newValues, String separator) { + if (originalValues == null) { + return newValues; } - - /** - * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias - * and the actual TaskAttemptContext - * @param alias the name given to the OutputFormat configuration - * @param context the Mapper or Reducer Context - * @return a copy of the TaskAttemptContext with the alias configuration populated - */ - public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { - String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); - TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConfiguration(), context.getTaskAttemptID()); - addToConfig(aliasConf, aliasContext.getConfiguration()); - return aliasContext; + Set mergedValues = new LinkedHashSet(); + mergedValues.addAll(Arrays.asList(StringUtils.split(originalValues, separator))); + mergedValues.addAll(Arrays.asList(StringUtils.split(newValues, separator))); + StringBuilder builder = new StringBuilder(originalValues.length() + newValues.length() + 2); + for (String value : mergedValues) { + builder.append(value).append(separator); } + return builder.substring(0, builder.length() - separator.length()); + } - /** - * Write the output key and value using the OutputFormat defined by the - * alias. - * - * @param alias the name given to the OutputFormat configuration - * @param key the output key to be written - * @param value the output value to be written - * @param context the Mapper or Reducer Context - * @throws IOException - * @throws InterruptedException - */ - public static void write(String alias, K key, V value, TaskInputOutputContext context) - throws IOException, InterruptedException { - KeyValue keyval = new KeyValue(key, value); - context.write(new Text(alias), keyval); - } + private static String getAliasConfName(String alias) { + return MO_ALIAS + "." + alias + ".conf"; + } - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - for (String alias : getOutputFormatAliases(context)) { - LOGGER.debug("Calling checkOutputSpecs for alias: " + alias); - JobContext aliasContext = getJobContext(alias, context); - OutputFormat outputFormat = getOutputFormatInstance(aliasContext); - outputFormat.checkOutputSpecs(aliasContext); - // Copy credentials and any new config added back to JobContext - context.getCredentials().addAll(aliasContext.getCredentials()); - setAliasConf(alias, context, aliasContext); - } + private static void addToConfig(String aliasConf, Configuration conf) { + String[] config = aliasConf.split(CONF_KEY_DELIM + "|" + CONF_VALUE_DELIM); + for (int i = 0; i < config.length; i += 2) { + conf.set(config[i], config[i + 1]); } + } - @Override - public RecordWriter getRecordWriter(TaskAttemptContext context) - throws IOException, - InterruptedException { - return new MultiRecordWriter(context); - } + /** + * Class that supports configuration of the job for multiple output formats. + */ + public static class JobConfigurer { - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, - InterruptedException { - return new MultiOutputCommitter(context); - } + private final Job job; + private Map outputConfigs = new LinkedHashMap(); - private static OutputFormat getOutputFormatInstance(JobContext context) { - OutputFormat outputFormat; - try { - outputFormat = ReflectionUtils.newInstance(context.getOutputFormatClass(), - context.getConfiguration()); - } catch (ClassNotFoundException e) { - throw new IllegalStateException(e); - } - return outputFormat; + private JobConfigurer(Job job) { + this.job = job; } - private static String[] getOutputFormatAliases(JobContext context) { - return context.getConfiguration().getStrings(MO_ALIASES); + private static JobConfigurer create(Job job) { + JobConfigurer configurer = new JobConfigurer(job); + return configurer; } /** - * Compare the aliasContext with userJob and add the differing configuration - * as mapreduce.multiout.alias..conf to the userJob. - *

- * Merge config like tmpjars, tmpfile, tmparchives, - * mapreduce.job.hdfs-servers that are directly handled by JobClient and add - * them to userJob. - *

- * Add mapred.output.dir config to userJob. + * Add a OutputFormat configuration to the Job with a alias name. * - * @param alias alias name associated with a OutputFormat - * @param userJob reference to Job that the user is going to submit - * @param aliasContext JobContext populated with OutputFormat related - * configuration. + * @param alias the name to be given to the OutputFormat configuration + * @param outputFormatClass OutputFormat class + * @param keyClass the key class for the output data + * @param valueClass the value class for the output data + * @throws IOException */ - private static void setAliasConf(String alias, JobContext userJob, JobContext aliasContext) { - Configuration userConf = userJob.getConfiguration(); - StringBuilder builder = new StringBuilder(); - for (Entry conf : aliasContext.getConfiguration()) { - String key = conf.getKey(); - String value = conf.getValue(); - String jobValue = userConf.getRaw(key); - if (jobValue == null || !jobValue.equals(value)) { - if (configsToMerge.containsKey(key)) { - String mergedValue = getMergedConfValue(jobValue, value, configsToMerge.get(key)); - userConf.set(key, mergedValue); - } else { - if (configsToOverride.contains(key)) { - userConf.set(key, value); - } - builder.append(key).append(CONF_KEY_DELIM).append(value) - .append(CONF_VALUE_DELIM); - } - } - } - if (builder.length() > CONF_VALUE_DELIM.length()) { - builder.delete(builder.length() - CONF_VALUE_DELIM.length(), builder.length()); - userConf.set(getAliasConfName(alias), builder.toString()); - } + public void addOutputFormat(String alias, + Class outputFormatClass, + Class keyClass, Class valueClass) throws IOException { + Job copy = new Job(this.job.getConfiguration()); + outputConfigs.put(alias, copy); + copy.setOutputFormatClass(outputFormatClass); + copy.setOutputKeyClass(keyClass); + copy.setOutputValueClass(valueClass); } - private static String getMergedConfValue(String originalValues, String newValues, String separator) { - if (originalValues == null) { - return newValues; - } - Set mergedValues = new LinkedHashSet(); - mergedValues.addAll(Arrays.asList(StringUtils.split(originalValues, separator))); - mergedValues.addAll(Arrays.asList(StringUtils.split(newValues, separator))); - StringBuilder builder = new StringBuilder(originalValues.length() + newValues.length() + 2); - for (String value : mergedValues) { - builder.append(value).append(separator); - } - return builder.substring(0, builder.length() - separator.length()); - } - - private static String getAliasConfName(String alias) { - return MO_ALIAS + "." + alias + ".conf"; - } - - private static void addToConfig(String aliasConf, Configuration conf) { - String[] config = aliasConf.split(CONF_KEY_DELIM + "|" + CONF_VALUE_DELIM); - for (int i = 0; i < config.length; i += 2) { - conf.set(config[i], config[i + 1]); - } + /** + * Get the Job configuration for a OutputFormat defined by the alias + * name. The job returned by this method should be passed to the + * OutputFormat for any configuration instead of the Job that will be + * submitted to the JobClient. + * + * @param alias the name used for the OutputFormat during + * addOutputFormat + * @return Job + */ + public Job getJob(String alias) { + Job copy = outputConfigs.get(alias); + if (copy == null) { + throw new IllegalArgumentException("OutputFormat with alias " + alias + + " has not beed added"); + } + return copy; } /** - * Class that supports configuration of the job for multiple output formats. + * Configure the job with the multiple output formats added. This method + * should be called after all the output formats have been added and + * configured and before the job submission. */ - public static class JobConfigurer { - - private final Job job; - private Map outputConfigs = new LinkedHashMap(); + public void configure() { + StringBuilder aliases = new StringBuilder(); + Configuration jobConf = job.getConfiguration(); + for (Entry entry : outputConfigs.entrySet()) { + // Copy credentials + job.getCredentials().addAll(entry.getValue().getCredentials()); + String alias = entry.getKey(); + aliases.append(alias).append(COMMA_DELIM); + // Store the differing configuration for each alias in the job + // as a setting. + setAliasConf(alias, job, entry.getValue()); + } + aliases.delete(aliases.length() - COMMA_DELIM.length(), aliases.length()); + jobConf.set(MO_ALIASES, aliases.toString()); + } - private JobConfigurer(Job job) { - this.job = job; - } + } - private static JobConfigurer create(Job job) { - JobConfigurer configurer = new JobConfigurer(job); - return configurer; - } + private static class KeyValue implements Writable { + private final K key; + private final V value; - /** - * Add a OutputFormat configuration to the Job with a alias name. - * - * @param alias the name to be given to the OutputFormat configuration - * @param outputFormatClass OutputFormat class - * @param keyClass the key class for the output data - * @param valueClass the value class for the output data - * @throws IOException - */ - public void addOutputFormat(String alias, - Class outputFormatClass, - Class keyClass, Class valueClass) throws IOException { - Job copy = new Job(this.job.getConfiguration()); - outputConfigs.put(alias, copy); - copy.setOutputFormatClass(outputFormatClass); - copy.setOutputKeyClass(keyClass); - copy.setOutputValueClass(valueClass); - } + public KeyValue(K key, V value) { + this.key = key; + this.value = value; + } - /** - * Get the Job configuration for a OutputFormat defined by the alias - * name. The job returned by this method should be passed to the - * OutputFormat for any configuration instead of the Job that will be - * submitted to the JobClient. - * - * @param alias the name used for the OutputFormat during - * addOutputFormat - * @return Job - */ - public Job getJob(String alias) { - Job copy = outputConfigs.get(alias); - if (copy == null) { - throw new IllegalArgumentException("OutputFormat with alias " + alias - + " has not beed added"); - } - return copy; - } + public K getKey() { + return key; + } - /** - * Configure the job with the multiple output formats added. This method - * should be called after all the output formats have been added and - * configured and before the job submission. - */ - public void configure() { - StringBuilder aliases = new StringBuilder(); - Configuration jobConf = job.getConfiguration(); - for (Entry entry : outputConfigs.entrySet()) { - // Copy credentials - job.getCredentials().addAll(entry.getValue().getCredentials()); - String alias = entry.getKey(); - aliases.append(alias).append(COMMA_DELIM); - // Store the differing configuration for each alias in the job - // as a setting. - setAliasConf(alias, job, entry.getValue()); - } - aliases.delete(aliases.length() - COMMA_DELIM.length(), aliases.length()); - jobConf.set(MO_ALIASES, aliases.toString()); - } + public V getValue() { + return value; + } + @Override + public void write(DataOutput out) throws IOException { + // Ignore. Not required as this will be never + // serialized/deserialized. } - private static class KeyValue implements Writable { - private final K key; - private final V value; + @Override + public void readFields(DataInput in) throws IOException { + // Ignore. Not required as this will be never + // serialized/deserialized. + } + } - public KeyValue(K key, V value) { - this.key = key; - this.value = value; - } + private static class MultiRecordWriter extends RecordWriter { - public K getKey() { - return key; - } + private final Map baseRecordWriters; - public V getValue() { - return value; - } + public MultiRecordWriter(TaskAttemptContext context) throws IOException, + InterruptedException { + baseRecordWriters = new LinkedHashMap(); + String[] aliases = getOutputFormatAliases(context); + for (String alias : aliases) { + LOGGER.info("Creating record writer for alias: " + alias); + TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); + Configuration aliasConf = aliasContext.getConfiguration(); + // Create output directory if not already created. + String outDir = aliasConf.get("mapred.output.dir"); + if (outDir != null) { + Path outputDir = new Path(outDir); + FileSystem fs = outputDir.getFileSystem(aliasConf); + if (!fs.exists(outputDir)) { + fs.mkdirs(outputDir); + } + } + OutputFormat outputFormat = getOutputFormatInstance(aliasContext); + baseRecordWriters.put(alias, + new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), + aliasContext)); + } + } - @Override - public void write(DataOutput out) throws IOException { - // Ignore. Not required as this will be never - // serialized/deserialized. - } + @Override + public void write(Writable key, Writable value) throws IOException, InterruptedException { + Text _key = (Text) key; + KeyValue _value = (KeyValue) value; + String alias = new String(_key.getBytes(), 0, _key.getLength()); + BaseRecordWriterContainer baseRWContainer = baseRecordWriters.get(alias); + if (baseRWContainer == null) { + throw new IllegalArgumentException("OutputFormat with alias " + alias + + " has not been added"); + } + baseRWContainer.getRecordWriter().write(_value.getKey(), _value.getValue()); + } - @Override - public void readFields(DataInput in) throws IOException { - // Ignore. Not required as this will be never - // serialized/deserialized. - } + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + for (Entry entry : baseRecordWriters.entrySet()) { + BaseRecordWriterContainer baseRWContainer = entry.getValue(); + LOGGER.info("Closing record writer for alias: " + entry.getKey()); + baseRWContainer.getRecordWriter().close(baseRWContainer.getContext()); + } } - private static class MultiRecordWriter extends RecordWriter { - - private final Map baseRecordWriters; - - public MultiRecordWriter(TaskAttemptContext context) throws IOException, - InterruptedException { - baseRecordWriters = new LinkedHashMap(); - String[] aliases = getOutputFormatAliases(context); - for (String alias : aliases) { - LOGGER.info("Creating record writer for alias: " + alias); - TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); - Configuration aliasConf = aliasContext.getConfiguration(); - // Create output directory if not already created. - String outDir = aliasConf.get("mapred.output.dir"); - if (outDir != null) { - Path outputDir = new Path(outDir); - FileSystem fs = outputDir.getFileSystem(aliasConf); - if (!fs.exists(outputDir)) { - fs.mkdirs(outputDir); - } - } - OutputFormat outputFormat = getOutputFormatInstance(aliasContext); - baseRecordWriters.put(alias, - new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), - aliasContext)); - } - } + } - @Override - public void write(Writable key, Writable value) throws IOException, InterruptedException { - Text _key = (Text) key; - KeyValue _value = (KeyValue) value; - String alias = new String(_key.getBytes(), 0, _key.getLength()); - BaseRecordWriterContainer baseRWContainer = baseRecordWriters.get(alias); - if (baseRWContainer == null) { - throw new IllegalArgumentException("OutputFormat with alias " + alias - + " has not been added"); - } - baseRWContainer.getRecordWriter().write(_value.getKey(), _value.getValue()); - } + private static class BaseRecordWriterContainer { - @Override - public void close(TaskAttemptContext context) throws IOException, InterruptedException { - for (Entry entry : baseRecordWriters.entrySet()) { - BaseRecordWriterContainer baseRWContainer = entry.getValue(); - LOGGER.info("Closing record writer for alias: " + entry.getKey()); - baseRWContainer.getRecordWriter().close(baseRWContainer.getContext()); - } - } + private final RecordWriter recordWriter; + private final TaskAttemptContext context; + public BaseRecordWriterContainer(RecordWriter recordWriter, TaskAttemptContext context) { + this.recordWriter = recordWriter; + this.context = context; } - private static class BaseRecordWriterContainer { - - private final RecordWriter recordWriter; - private final TaskAttemptContext context; - - public BaseRecordWriterContainer(RecordWriter recordWriter, TaskAttemptContext context) { - this.recordWriter = recordWriter; - this.context = context; - } - - public RecordWriter getRecordWriter() { - return recordWriter; - } + public RecordWriter getRecordWriter() { + return recordWriter; + } - public TaskAttemptContext getContext() { - return context; - } + public TaskAttemptContext getContext() { + return context; } + } - public class MultiOutputCommitter extends OutputCommitter { + public class MultiOutputCommitter extends OutputCommitter { - private final Map outputCommitters; + private final Map outputCommitters; - public MultiOutputCommitter(TaskAttemptContext context) throws IOException, - InterruptedException { - outputCommitters = new LinkedHashMap(); - String[] aliases = getOutputFormatAliases(context); - for (String alias : aliases) { - LOGGER.info("Creating output committer for alias: " + alias); - TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); - OutputCommitter baseCommitter = getOutputFormatInstance(aliasContext) - .getOutputCommitter(aliasContext); - outputCommitters.put(alias, - new BaseOutputCommitterContainer(baseCommitter, aliasContext)); - } - } + public MultiOutputCommitter(TaskAttemptContext context) throws IOException, + InterruptedException { + outputCommitters = new LinkedHashMap(); + String[] aliases = getOutputFormatAliases(context); + for (String alias : aliases) { + LOGGER.info("Creating output committer for alias: " + alias); + TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); + OutputCommitter baseCommitter = getOutputFormatInstance(aliasContext) + .getOutputCommitter(aliasContext); + outputCommitters.put(alias, + new BaseOutputCommitterContainer(baseCommitter, aliasContext)); + } + } - @Override - public void setupJob(JobContext jobContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling setupJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().setupJob(outputContainer.getContext()); - } - } + @Override + public void setupJob(JobContext jobContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling setupJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().setupJob(outputContainer.getContext()); + } + } - @Override - public void setupTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling setupTask for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().setupTask(outputContainer.getContext()); - } - } + @Override + public void setupTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling setupTask for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().setupTask(outputContainer.getContext()); + } + } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { - boolean needTaskCommit = false; - for (String alias : outputCommitters.keySet()) { - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - needTaskCommit = needTaskCommit - || outputContainer.getBaseCommitter().needsTaskCommit( - outputContainer.getContext()); - } - return needTaskCommit; - } + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { + boolean needTaskCommit = false; + for (String alias : outputCommitters.keySet()) { + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + needTaskCommit = needTaskCommit + || outputContainer.getBaseCommitter().needsTaskCommit( + outputContainer.getContext()); + } + return needTaskCommit; + } - @Override - public void commitTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - OutputCommitter baseCommitter = outputContainer.getBaseCommitter(); - TaskAttemptContext committerContext = outputContainer.getContext(); - if (baseCommitter.needsTaskCommit(committerContext)) { - LOGGER.info("Calling commitTask for alias: " + alias); - baseCommitter.commitTask(committerContext); - } - } - } + @Override + public void commitTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + OutputCommitter baseCommitter = outputContainer.getBaseCommitter(); + TaskAttemptContext committerContext = outputContainer.getContext(); + if (baseCommitter.needsTaskCommit(committerContext)) { + LOGGER.info("Calling commitTask for alias: " + alias); + baseCommitter.commitTask(committerContext); + } + } + } - @Override - public void abortTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling abortTask for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().abortTask(outputContainer.getContext()); - } - } + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling abortTask for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().abortTask(outputContainer.getContext()); + } + } - @Override - public void commitJob(JobContext jobContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling commitJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().commitJob(outputContainer.getContext()); - } - } + @Override + public void commitJob(JobContext jobContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling commitJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().commitJob(outputContainer.getContext()); + } + } - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling abortJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().abortJob(outputContainer.getContext(), state); - } - } + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling abortJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().abortJob(outputContainer.getContext(), state); + } } + } - private static class BaseOutputCommitterContainer { + private static class BaseOutputCommitterContainer { - private final OutputCommitter outputCommitter; - private final TaskAttemptContext context; + private final OutputCommitter outputCommitter; + private final TaskAttemptContext context; - public BaseOutputCommitterContainer(OutputCommitter outputCommitter, - TaskAttemptContext context) { - this.outputCommitter = outputCommitter; - this.context = context; - } + public BaseOutputCommitterContainer(OutputCommitter outputCommitter, + TaskAttemptContext context) { + this.outputCommitter = outputCommitter; + this.context = context; + } - public OutputCommitter getBaseCommitter() { - return outputCommitter; - } + public OutputCommitter getBaseCommitter() { + return outputCommitter; + } - public TaskAttemptContext getContext() { - return context; - } + public TaskAttemptContext getContext() { + return context; } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputCommitterContainer.java index 3c68b0c..2125e14 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputCommitterContainer.java @@ -27,21 +27,21 @@ * See {@link OutputFormatContainer} for more information about containers. */ abstract class OutputCommitterContainer extends OutputCommitter { - private final org.apache.hadoop.mapred.OutputCommitter committer; + private final org.apache.hadoop.mapred.OutputCommitter committer; - /** - * @param context current JobContext - * @param committer OutputCommitter that this instance will contain - */ - public OutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter committer) { - this.committer = committer; - } + /** + * @param context current JobContext + * @param committer OutputCommitter that this instance will contain + */ + public OutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter committer) { + this.committer = committer; + } - /** - * @return underlying OutputCommitter - */ - public OutputCommitter getBaseOutputCommitter() { - return committer; - } + /** + * @return underlying OutputCommitter + */ + public OutputCommitter getBaseOutputCommitter() { + return committer; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputFormatContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputFormatContainer.java index 07b63dd..d83b003 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputFormatContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputFormatContainer.java @@ -34,20 +34,20 @@ * such as partitioning isn't supported. */ abstract class OutputFormatContainer extends OutputFormat, HCatRecord> { - private org.apache.hadoop.mapred.OutputFormat, ? super Writable> of; + private org.apache.hadoop.mapred.OutputFormat, ? super Writable> of; - /** - * @param of OutputFormat this instance will contain - */ - public OutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { - this.of = of; - } + /** + * @param of OutputFormat this instance will contain + */ + public OutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { + this.of = of; + } - /** - * @return underlying OutputFormat - */ - public org.apache.hadoop.mapred.OutputFormat getBaseOutputFormat() { - return of; - } + /** + * @return underlying OutputFormat + */ + public org.apache.hadoop.mapred.OutputFormat getBaseOutputFormat() { + return of; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputJobInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputJobInfo.java index 36c166a..b63bdc2 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputJobInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputJobInfo.java @@ -32,239 +32,239 @@ /** The class used to serialize and store the output related information */ public class OutputJobInfo implements Serializable { - /** The db and table names. */ - private final String databaseName; - private final String tableName; - - /** The serialization version. */ - private static final long serialVersionUID = 1L; - - /** The table info provided by user. */ - private HCatTableInfo tableInfo; - - /** The output schema. This is given to us by user. This wont contain any - * partition columns ,even if user has specified them. - * */ - private HCatSchema outputSchema; - - /** The location of the partition being written */ - private String location; - - /** The partition values to publish to, if used for output*/ - private Map partitionValues; - - private List posOfPartCols; - private List posOfDynPartCols; - - private Properties properties; - - private int maxDynamicPartitions; - - /** List of keys for which values were not specified at write setup time, to be infered at write time */ - private List dynamicPartitioningKeys; - - private boolean harRequested; - - /** - * Initializes a new OutputJobInfo instance - * for writing data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param partitionValues The partition values to publish to, can be null or empty Map to - * work with hadoop security, the kerberos principal name of the server - else null - * The principal name should be of the form: - * /_HOST@ like "hcat/_HOST@myrealm.com" - * The special string _HOST will be replaced automatically with the correct host name - * indicate write to a unpartitioned table. For partitioned tables, this map should - * contain keys for all partition columns with corresponding values. - */ - public static OutputJobInfo create(String databaseName, - String tableName, - Map partitionValues) { - return new OutputJobInfo(databaseName, - tableName, - partitionValues); - } - - private OutputJobInfo(String databaseName, - String tableName, - Map partitionValues) { - this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.partitionValues = partitionValues; - this.properties = new Properties(); - } - - /** - * @return the posOfPartCols - */ - protected List getPosOfPartCols() { - return posOfPartCols; - } - - /** - * @return the posOfDynPartCols - */ - protected List getPosOfDynPartCols() { - return posOfDynPartCols; - } - - /** - * @param posOfPartCols the posOfPartCols to set - */ - protected void setPosOfPartCols(List posOfPartCols) { - // sorting the list in the descending order so that deletes happen back-to-front - Collections.sort(posOfPartCols, new Comparator() { - @Override - public int compare(Integer earlier, Integer later) { - return (earlier > later) ? -1 : ((earlier == later) ? 0 : 1); - } - }); - this.posOfPartCols = posOfPartCols; - } - - /** - * @param posOfDynPartCols the posOfDynPartCols to set - */ - protected void setPosOfDynPartCols(List posOfDynPartCols) { - // Important - no sorting here! We retain order, it's used to match with values at runtime - this.posOfDynPartCols = posOfDynPartCols; - } - - /** - * @return the tableInfo - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } - - /** - * @return the outputSchema - */ - public HCatSchema getOutputSchema() { - return outputSchema; - } - - /** - * @param schema the outputSchema to set - */ - public void setOutputSchema(HCatSchema schema) { - this.outputSchema = schema; - } - - /** - * @return the location - */ - public String getLocation() { - return location; - } - - /** - * @param location location to write to - */ - public void setLocation(String location) { - this.location = location; - } - - /** - * Sets the value of partitionValues - * @param partitionValues the partition values to set - */ - void setPartitionValues(Map partitionValues) { - this.partitionValues = partitionValues; - } - - /** - * Gets the value of partitionValues - * @return the partitionValues - */ - public Map getPartitionValues() { - return partitionValues; - } - - /** - * set the tablInfo instance - * this should be the same instance - * determined by this object's DatabaseName and TableName - * @param tableInfo - */ - void setTableInfo(HCatTableInfo tableInfo) { - this.tableInfo = tableInfo; - } - - /** - * @return database name of table to write to - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * @return name of table to write to - */ - public String getTableName() { - return tableName; - } - - /** - * Set/Get Property information to be passed down to *StorageHandler implementation - * put implementation specific storage handler configurations here - * @return the implementation specific job properties - */ - public Properties getProperties() { - return properties; - } - - /** - * Set maximum number of allowable dynamic partitions - * @param maxDynamicPartitions - */ - public void setMaximumDynamicPartitions(int maxDynamicPartitions) { - this.maxDynamicPartitions = maxDynamicPartitions; - } - - /** - * Returns maximum number of allowable dynamic partitions - * @return maximum number of allowable dynamic partitions - */ - public int getMaxDynamicPartitions() { - return this.maxDynamicPartitions; - } - - /** - * Sets whether or not hadoop archiving has been requested for this job - * @param harRequested - */ - public void setHarRequested(boolean harRequested) { - this.harRequested = harRequested; - } - - /** - * Returns whether or not hadoop archiving has been requested for this job - * @return whether or not hadoop archiving has been requested for this job - */ - public boolean getHarRequested() { - return this.harRequested; - } - - /** - * Returns whether or not Dynamic Partitioning is used - * @return whether or not dynamic partitioning is currently enabled and used - */ - public boolean isDynamicPartitioningUsed() { - return !((dynamicPartitioningKeys == null) || (dynamicPartitioningKeys.isEmpty())); - } - - /** - * Sets the list of dynamic partitioning keys used for outputting without specifying all the keys - * @param dynamicPartitioningKeys - */ - public void setDynamicPartitioningKeys(List dynamicPartitioningKeys) { - this.dynamicPartitioningKeys = dynamicPartitioningKeys; - } - - public List getDynamicPartitioningKeys() { - return this.dynamicPartitioningKeys; - } + /** The db and table names. */ + private final String databaseName; + private final String tableName; + + /** The serialization version. */ + private static final long serialVersionUID = 1L; + + /** The table info provided by user. */ + private HCatTableInfo tableInfo; + + /** The output schema. This is given to us by user. This wont contain any + * partition columns ,even if user has specified them. + * */ + private HCatSchema outputSchema; + + /** The location of the partition being written */ + private String location; + + /** The partition values to publish to, if used for output*/ + private Map partitionValues; + + private List posOfPartCols; + private List posOfDynPartCols; + + private Properties properties; + + private int maxDynamicPartitions; + + /** List of keys for which values were not specified at write setup time, to be infered at write time */ + private List dynamicPartitioningKeys; + + private boolean harRequested; + + /** + * Initializes a new OutputJobInfo instance + * for writing data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param partitionValues The partition values to publish to, can be null or empty Map to + * work with hadoop security, the kerberos principal name of the server - else null + * The principal name should be of the form: + * /_HOST@ like "hcat/_HOST@myrealm.com" + * The special string _HOST will be replaced automatically with the correct host name + * indicate write to a unpartitioned table. For partitioned tables, this map should + * contain keys for all partition columns with corresponding values. + */ + public static OutputJobInfo create(String databaseName, + String tableName, + Map partitionValues) { + return new OutputJobInfo(databaseName, + tableName, + partitionValues); + } + + private OutputJobInfo(String databaseName, + String tableName, + Map partitionValues) { + this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.partitionValues = partitionValues; + this.properties = new Properties(); + } + + /** + * @return the posOfPartCols + */ + protected List getPosOfPartCols() { + return posOfPartCols; + } + + /** + * @return the posOfDynPartCols + */ + protected List getPosOfDynPartCols() { + return posOfDynPartCols; + } + + /** + * @param posOfPartCols the posOfPartCols to set + */ + protected void setPosOfPartCols(List posOfPartCols) { + // sorting the list in the descending order so that deletes happen back-to-front + Collections.sort(posOfPartCols, new Comparator() { + @Override + public int compare(Integer earlier, Integer later) { + return (earlier > later) ? -1 : ((earlier == later) ? 0 : 1); + } + }); + this.posOfPartCols = posOfPartCols; + } + + /** + * @param posOfDynPartCols the posOfDynPartCols to set + */ + protected void setPosOfDynPartCols(List posOfDynPartCols) { + // Important - no sorting here! We retain order, it's used to match with values at runtime + this.posOfDynPartCols = posOfDynPartCols; + } + + /** + * @return the tableInfo + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } + + /** + * @return the outputSchema + */ + public HCatSchema getOutputSchema() { + return outputSchema; + } + + /** + * @param schema the outputSchema to set + */ + public void setOutputSchema(HCatSchema schema) { + this.outputSchema = schema; + } + + /** + * @return the location + */ + public String getLocation() { + return location; + } + + /** + * @param location location to write to + */ + public void setLocation(String location) { + this.location = location; + } + + /** + * Sets the value of partitionValues + * @param partitionValues the partition values to set + */ + void setPartitionValues(Map partitionValues) { + this.partitionValues = partitionValues; + } + + /** + * Gets the value of partitionValues + * @return the partitionValues + */ + public Map getPartitionValues() { + return partitionValues; + } + + /** + * set the tablInfo instance + * this should be the same instance + * determined by this object's DatabaseName and TableName + * @param tableInfo + */ + void setTableInfo(HCatTableInfo tableInfo) { + this.tableInfo = tableInfo; + } + + /** + * @return database name of table to write to + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * @return name of table to write to + */ + public String getTableName() { + return tableName; + } + + /** + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here + * @return the implementation specific job properties + */ + public Properties getProperties() { + return properties; + } + + /** + * Set maximum number of allowable dynamic partitions + * @param maxDynamicPartitions + */ + public void setMaximumDynamicPartitions(int maxDynamicPartitions) { + this.maxDynamicPartitions = maxDynamicPartitions; + } + + /** + * Returns maximum number of allowable dynamic partitions + * @return maximum number of allowable dynamic partitions + */ + public int getMaxDynamicPartitions() { + return this.maxDynamicPartitions; + } + + /** + * Sets whether or not hadoop archiving has been requested for this job + * @param harRequested + */ + public void setHarRequested(boolean harRequested) { + this.harRequested = harRequested; + } + + /** + * Returns whether or not hadoop archiving has been requested for this job + * @return whether or not hadoop archiving has been requested for this job + */ + public boolean getHarRequested() { + return this.harRequested; + } + + /** + * Returns whether or not Dynamic Partitioning is used + * @return whether or not dynamic partitioning is currently enabled and used + */ + public boolean isDynamicPartitioningUsed() { + return !((dynamicPartitioningKeys == null) || (dynamicPartitioningKeys.isEmpty())); + } + + /** + * Sets the list of dynamic partitioning keys used for outputting without specifying all the keys + * @param dynamicPartitioningKeys + */ + public void setDynamicPartitioningKeys(List dynamicPartitioningKeys) { + this.dynamicPartitioningKeys = dynamicPartitioningKeys; + } + + public List getDynamicPartitioningKeys() { + return this.dynamicPartitioningKeys; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java index 57b84b2..a76d912 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java @@ -27,138 +27,138 @@ /** The Class used to serialize the partition information read from the metadata server that maps to a partition. */ public class PartInfo implements Serializable { - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The partition schema. */ - private final HCatSchema partitionSchema; - - /** The information about which input storage handler to use */ - private final String storageHandlerClassName; - private final String inputFormatClassName; - private final String outputFormatClassName; - private final String serdeClassName; - - /** HCat-specific properties set at the partition */ - private final Properties hcatProperties; - - /** The data location. */ - private final String location; - - /** The map of partition key names and their values. */ - private Map partitionValues; - - /** Job properties associated with this parition */ - Map jobProperties; - - /** the table info associated with this partition */ - HCatTableInfo tableInfo; - - /** - * Instantiates a new hcat partition info. - * @param partitionSchema the partition schema - * @param storageHandler the storage handler - * @param location the location - * @param hcatProperties hcat-specific properties at the partition - * @param jobProperties the job properties - * @param tableInfo the table information - */ - public PartInfo(HCatSchema partitionSchema, HCatStorageHandler storageHandler, - String location, Properties hcatProperties, - Map jobProperties, HCatTableInfo tableInfo) { - this.partitionSchema = partitionSchema; - this.location = location; - this.hcatProperties = hcatProperties; - this.jobProperties = jobProperties; - this.tableInfo = tableInfo; - - this.storageHandlerClassName = storageHandler.getClass().getName(); - this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); - this.serdeClassName = storageHandler.getSerDeClass().getName(); - this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); - } - - /** - * Gets the value of partitionSchema. - * @return the partitionSchema - */ - public HCatSchema getPartitionSchema() { - return partitionSchema; - } - - /** - * @return the storage handler class name - */ - public String getStorageHandlerClassName() { - return storageHandlerClassName; - } - - /** - * @return the inputFormatClassName - */ - public String getInputFormatClassName() { - return inputFormatClassName; - } - - /** - * @return the outputFormatClassName - */ - public String getOutputFormatClassName() { - return outputFormatClassName; - } - - /** - * @return the serdeClassName - */ - public String getSerdeClassName() { - return serdeClassName; - } - - /** - * Gets the input storage handler properties. - * @return HCat-specific properties set at the partition - */ - public Properties getInputStorageHandlerProperties() { - return hcatProperties; - } - - /** - * Gets the value of location. - * @return the location - */ - public String getLocation() { - return location; - } - - /** - * Sets the partition values. - * @param partitionValues the new partition values - */ - public void setPartitionValues(Map partitionValues) { - this.partitionValues = partitionValues; - } - - /** - * Gets the partition values. - * @return the partition values - */ - public Map getPartitionValues() { - return partitionValues; - } - - /** - * Gets the job properties. - * @return a map of the job properties - */ - public Map getJobProperties() { - return jobProperties; - } - - /** - * Gets the HCatalog table information. - * @return the table information - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The partition schema. */ + private final HCatSchema partitionSchema; + + /** The information about which input storage handler to use */ + private final String storageHandlerClassName; + private final String inputFormatClassName; + private final String outputFormatClassName; + private final String serdeClassName; + + /** HCat-specific properties set at the partition */ + private final Properties hcatProperties; + + /** The data location. */ + private final String location; + + /** The map of partition key names and their values. */ + private Map partitionValues; + + /** Job properties associated with this parition */ + Map jobProperties; + + /** the table info associated with this partition */ + HCatTableInfo tableInfo; + + /** + * Instantiates a new hcat partition info. + * @param partitionSchema the partition schema + * @param storageHandler the storage handler + * @param location the location + * @param hcatProperties hcat-specific properties at the partition + * @param jobProperties the job properties + * @param tableInfo the table information + */ + public PartInfo(HCatSchema partitionSchema, HCatStorageHandler storageHandler, + String location, Properties hcatProperties, + Map jobProperties, HCatTableInfo tableInfo) { + this.partitionSchema = partitionSchema; + this.location = location; + this.hcatProperties = hcatProperties; + this.jobProperties = jobProperties; + this.tableInfo = tableInfo; + + this.storageHandlerClassName = storageHandler.getClass().getName(); + this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); + this.serdeClassName = storageHandler.getSerDeClass().getName(); + this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); + } + + /** + * Gets the value of partitionSchema. + * @return the partitionSchema + */ + public HCatSchema getPartitionSchema() { + return partitionSchema; + } + + /** + * @return the storage handler class name + */ + public String getStorageHandlerClassName() { + return storageHandlerClassName; + } + + /** + * @return the inputFormatClassName + */ + public String getInputFormatClassName() { + return inputFormatClassName; + } + + /** + * @return the outputFormatClassName + */ + public String getOutputFormatClassName() { + return outputFormatClassName; + } + + /** + * @return the serdeClassName + */ + public String getSerdeClassName() { + return serdeClassName; + } + + /** + * Gets the input storage handler properties. + * @return HCat-specific properties set at the partition + */ + public Properties getInputStorageHandlerProperties() { + return hcatProperties; + } + + /** + * Gets the value of location. + * @return the location + */ + public String getLocation() { + return location; + } + + /** + * Sets the partition values. + * @param partitionValues the new partition values + */ + public void setPartitionValues(Map partitionValues) { + this.partitionValues = partitionValues; + } + + /** + * Gets the partition values. + * @return the partition values + */ + public Map getPartitionValues() { + return partitionValues; + } + + /** + * Gets the job properties. + * @return a map of the job properties + */ + public Map getJobProperties() { + return jobProperties; + } + + /** + * Gets the HCatalog table information. + * @return the table information + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ProgressReporter.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ProgressReporter.java index 99a7ef3..40b9bb1 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ProgressReporter.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ProgressReporter.java @@ -28,65 +28,65 @@ class ProgressReporter extends StatusReporter implements Reporter { - private TaskInputOutputContext context = null; - private TaskAttemptContext taskAttemptContext = null; + private TaskInputOutputContext context = null; + private TaskAttemptContext taskAttemptContext = null; - public ProgressReporter(TaskAttemptContext context) { - if (context instanceof TaskInputOutputContext) { - this.context = (TaskInputOutputContext) context; - } else { - taskAttemptContext = context; - } + public ProgressReporter(TaskAttemptContext context) { + if (context instanceof TaskInputOutputContext) { + this.context = (TaskInputOutputContext) context; + } else { + taskAttemptContext = context; } + } - @Override - public void setStatus(String status) { - if (context != null) { - context.setStatus(status); - } + @Override + public void setStatus(String status) { + if (context != null) { + context.setStatus(status); } + } - @Override - public Counters.Counter getCounter(Enum name) { - return (context != null) ? (Counters.Counter) context.getCounter(name) : null; - } + @Override + public Counters.Counter getCounter(Enum name) { + return (context != null) ? (Counters.Counter) context.getCounter(name) : null; + } - @Override - public Counters.Counter getCounter(String group, String name) { - return (context != null) ? (Counters.Counter) context.getCounter(group, name) : null; - } + @Override + public Counters.Counter getCounter(String group, String name) { + return (context != null) ? (Counters.Counter) context.getCounter(group, name) : null; + } - @Override - public void incrCounter(Enum key, long amount) { - if (context != null) { - context.getCounter(key).increment(amount); - } + @Override + public void incrCounter(Enum key, long amount) { + if (context != null) { + context.getCounter(key).increment(amount); } + } - @Override - public void incrCounter(String group, String counter, long amount) { - if (context != null) { - context.getCounter(group, counter).increment(amount); - } + @Override + public void incrCounter(String group, String counter, long amount) { + if (context != null) { + context.getCounter(group, counter).increment(amount); } + } - @Override - public InputSplit getInputSplit() throws UnsupportedOperationException { - return null; - } + @Override + public InputSplit getInputSplit() throws UnsupportedOperationException { + return null; + } - public float getProgress() { - /* Required to build against 0.23 Reporter and StatusReporter. */ - /* TODO: determine the progress. */ - return 0.0f; - } + public float getProgress() { + /* Required to build against 0.23 Reporter and StatusReporter. */ + /* TODO: determine the progress. */ + return 0.0f; + } - @Override - public void progress() { - if (context != null) { - context.progress(); - } else { - taskAttemptContext.progress(); - } + @Override + public void progress() { + if (context != null) { + context.progress(); + } else { + taskAttemptContext.progress(); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/RecordWriterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/RecordWriterContainer.java index fcae25b..5905b46 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/RecordWriterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/RecordWriterContainer.java @@ -32,22 +32,22 @@ */ abstract class RecordWriterContainer extends RecordWriter, HCatRecord> { - private final org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter; - - /** - * @param context current JobContext - * @param baseRecordWriter RecordWriter that this instance will contain - */ - public RecordWriterContainer(TaskAttemptContext context, - org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) { - this.baseRecordWriter = baseRecordWriter; - } - - /** - * @return underlying RecordWriter - */ - public org.apache.hadoop.mapred.RecordWriter getBaseRecordWriter() { - return baseRecordWriter; - } + private final org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter; + + /** + * @param context current JobContext + * @param baseRecordWriter RecordWriter that this instance will contain + */ + public RecordWriterContainer(TaskAttemptContext context, + org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) { + this.baseRecordWriter = baseRecordWriter; + } + + /** + * @return underlying RecordWriter + */ + public org.apache.hadoop.mapred.RecordWriter getBaseRecordWriter() { + return baseRecordWriter; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java index d4dc276..39ef86e 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java @@ -46,146 +46,146 @@ final class Security { - private static final Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); - - // making sure this is not initialized unless needed - private static final class LazyHolder { - public static final Security INSTANCE = new Security(); + private static final Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); + + // making sure this is not initialized unless needed + private static final class LazyHolder { + public static final Security INSTANCE = new Security(); + } + + public static Security getInstance() { + return LazyHolder.INSTANCE; + } + + boolean isSecurityEnabled() { + try { + Method m = UserGroupInformation.class.getMethod("isSecurityEnabled"); + return (Boolean) m.invoke(null, (Object[]) null); + } catch (NoSuchMethodException e) { + LOG.info("Security is not supported by this version of hadoop.", e); + } catch (InvocationTargetException e) { + String msg = "Failed to call isSecurityEnabled()"; + LOG.info(msg, e); + throw new IllegalStateException(msg, e); + } catch (IllegalAccessException e) { + String msg = "Failed to call isSecurityEnabled()"; + LOG.info(msg, e); + throw new IllegalStateException(msg, e); } - - public static Security getInstance() { - return LazyHolder.INSTANCE; + return false; + } + + // a signature string to associate with a HCatTableInfo - essentially + // a concatenation of dbname, tablename and partition keyvalues. + String getTokenSignature(OutputJobInfo outputJobInfo) { + StringBuilder result = new StringBuilder(""); + String dbName = outputJobInfo.getDatabaseName(); + if (dbName != null) { + result.append(dbName); } - - boolean isSecurityEnabled() { - try { - Method m = UserGroupInformation.class.getMethod("isSecurityEnabled"); - return (Boolean) m.invoke(null, (Object[]) null); - } catch (NoSuchMethodException e) { - LOG.info("Security is not supported by this version of hadoop.", e); - } catch (InvocationTargetException e) { - String msg = "Failed to call isSecurityEnabled()"; - LOG.info(msg, e); - throw new IllegalStateException(msg, e); - } catch (IllegalAccessException e) { - String msg = "Failed to call isSecurityEnabled()"; - LOG.info(msg, e); - throw new IllegalStateException(msg, e); - } - return false; + String tableName = outputJobInfo.getTableName(); + if (tableName != null) { + result.append("." + tableName); } + Map partValues = outputJobInfo.getPartitionValues(); + if (partValues != null) { + for (Entry entry : partValues.entrySet()) { + result.append("/"); + result.append(entry.getKey()); + result.append("="); + result.append(entry.getValue()); + } - // a signature string to associate with a HCatTableInfo - essentially - // a concatenation of dbname, tablename and partition keyvalues. - String getTokenSignature(OutputJobInfo outputJobInfo) { - StringBuilder result = new StringBuilder(""); - String dbName = outputJobInfo.getDatabaseName(); - if (dbName != null) { - result.append(dbName); - } - String tableName = outputJobInfo.getTableName(); - if (tableName != null) { - result.append("." + tableName); - } - Map partValues = outputJobInfo.getPartitionValues(); - if (partValues != null) { - for (Entry entry : partValues.entrySet()) { - result.append("/"); - result.append(entry.getKey()); - result.append("="); - result.append(entry.getValue()); - } - - } - return result.toString(); } - - void handleSecurity( - Credentials credentials, - OutputJobInfo outputJobInfo, - HiveMetaStoreClient client, - Configuration conf, - boolean harRequested) - throws IOException, MetaException, TException, Exception { - if (UserGroupInformation.isSecurityEnabled()) { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - // check if oozie has set up a hcat deleg. token - if so use it - TokenSelector hiveTokenSelector = new DelegationTokenSelector(); - //Oozie does not change the service field of the token - //hence by default token generation will have a value of "new Text("")" - //HiveClient will look for a use TokenSelector.selectToken() with service - //set to empty "Text" if hive.metastore.token.signature property is set to null - Token hiveToken = hiveTokenSelector.selectToken( - new Text(), ugi.getTokens()); - if (hiveToken == null) { - // we did not get token set up by oozie, let's get them ourselves here. - // we essentially get a token per unique Output HCatTableInfo - this is - // done because through Pig, setOutput() method is called multiple times - // We want to only get the token once per unique output HCatTableInfo - - // we cannot just get one token since in multi-query case (> 1 store in 1 job) - // or the case when a single pig script results in > 1 jobs, the single - // token will get cancelled by the output committer and the subsequent - // stores will fail - by tying the token with the concatenation of - // dbname, tablename and partition keyvalues of the output - // TableInfo, we can have as many tokens as there are stores and the TokenSelector - // will correctly pick the right tokens which the committer will use and - // cancel. - String tokenSignature = getTokenSignature(outputJobInfo); - // get delegation tokens from hcat server and store them into the "job" - // These will be used in to publish partitions to - // hcat normally in OutputCommitter.commitJob() - // when the JobTracker in Hadoop MapReduce starts supporting renewal of - // arbitrary tokens, the renewer should be the principal of the JobTracker - hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName()), tokenSignature); - - if (harRequested) { - TokenSelector jtTokenSelector = - new org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenSelector(); - Token jtToken = jtTokenSelector.selectToken(org.apache.hadoop.security.SecurityUtil.buildTokenService( - ShimLoader.getHadoopShims().getHCatShim().getResourceManagerAddress(conf)), ugi.getTokens()); - if (jtToken == null) { - //we don't need to cancel this token as the TokenRenewer for JT tokens - //takes care of cancelling them - credentials.addToken( - new Text("hcat jt token"), - HCatUtil.getJobTrackerDelegationToken(conf, ugi.getUserName()) - ); - } - } - - credentials.addToken(new Text(ugi.getUserName() + "_" + tokenSignature), hiveToken); - // this will be used by the outputcommitter to pass on to the metastore client - // which in turn will pass on to the TokenSelector so that it can select - // the right token. - conf.set(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE, tokenSignature); - } + return result.toString(); + } + + void handleSecurity( + Credentials credentials, + OutputJobInfo outputJobInfo, + HiveMetaStoreClient client, + Configuration conf, + boolean harRequested) + throws IOException, MetaException, TException, Exception { + if (UserGroupInformation.isSecurityEnabled()) { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + // check if oozie has set up a hcat deleg. token - if so use it + TokenSelector hiveTokenSelector = new DelegationTokenSelector(); + //Oozie does not change the service field of the token + //hence by default token generation will have a value of "new Text("")" + //HiveClient will look for a use TokenSelector.selectToken() with service + //set to empty "Text" if hive.metastore.token.signature property is set to null + Token hiveToken = hiveTokenSelector.selectToken( + new Text(), ugi.getTokens()); + if (hiveToken == null) { + // we did not get token set up by oozie, let's get them ourselves here. + // we essentially get a token per unique Output HCatTableInfo - this is + // done because through Pig, setOutput() method is called multiple times + // We want to only get the token once per unique output HCatTableInfo - + // we cannot just get one token since in multi-query case (> 1 store in 1 job) + // or the case when a single pig script results in > 1 jobs, the single + // token will get cancelled by the output committer and the subsequent + // stores will fail - by tying the token with the concatenation of + // dbname, tablename and partition keyvalues of the output + // TableInfo, we can have as many tokens as there are stores and the TokenSelector + // will correctly pick the right tokens which the committer will use and + // cancel. + String tokenSignature = getTokenSignature(outputJobInfo); + // get delegation tokens from hcat server and store them into the "job" + // These will be used in to publish partitions to + // hcat normally in OutputCommitter.commitJob() + // when the JobTracker in Hadoop MapReduce starts supporting renewal of + // arbitrary tokens, the renewer should be the principal of the JobTracker + hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName()), tokenSignature); + + if (harRequested) { + TokenSelector jtTokenSelector = + new org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenSelector(); + Token jtToken = jtTokenSelector.selectToken(org.apache.hadoop.security.SecurityUtil.buildTokenService( + ShimLoader.getHadoopShims().getHCatShim().getResourceManagerAddress(conf)), ugi.getTokens()); + if (jtToken == null) { + //we don't need to cancel this token as the TokenRenewer for JT tokens + //takes care of cancelling them + credentials.addToken( + new Text("hcat jt token"), + HCatUtil.getJobTrackerDelegationToken(conf, ugi.getUserName()) + ); + } } - } - void handleSecurity( - Job job, - OutputJobInfo outputJobInfo, - HiveMetaStoreClient client, - Configuration conf, - boolean harRequested) - throws IOException, MetaException, TException, Exception { - handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); + credentials.addToken(new Text(ugi.getUserName() + "_" + tokenSignature), hiveToken); + // this will be used by the outputcommitter to pass on to the metastore client + // which in turn will pass on to the TokenSelector so that it can select + // the right token. + conf.set(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE, tokenSignature); + } } - - // we should cancel hcat token if it was acquired by hcat - // and not if it was supplied (ie Oozie). In the latter - // case the HCAT_KEY_TOKEN_SIGNATURE property in the conf will not be set - void cancelToken(HiveMetaStoreClient client, JobContext context) throws IOException, MetaException { - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - try { - client.cancelDelegationToken(tokenStrForm); - } catch (TException e) { - String msg = "Failed to cancel delegation token"; - LOG.error(msg, e); - throw new IOException(msg, e); - } - } + } + + void handleSecurity( + Job job, + OutputJobInfo outputJobInfo, + HiveMetaStoreClient client, + Configuration conf, + boolean harRequested) + throws IOException, MetaException, TException, Exception { + handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); + } + + // we should cancel hcat token if it was acquired by hcat + // and not if it was supplied (ie Oozie). In the latter + // case the HCAT_KEY_TOKEN_SIGNATURE property in the conf will not be set + void cancelToken(HiveMetaStoreClient client, JobContext context) throws IOException, MetaException { + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + try { + client.cancelDelegationToken(tokenStrForm); + } catch (TException e) { + String msg = "Failed to cancel delegation token"; + LOG.error(msg, e); + throw new IOException(msg, e); + } } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/StorerInfo.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/StorerInfo.java index 47f8439..4a9aa36 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/StorerInfo.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/StorerInfo.java @@ -25,85 +25,85 @@ /** Information about the storer to use for writing the data. */ public class StorerInfo implements Serializable { - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The properties for the storage handler */ - private Properties properties; - - private String ofClass; - - private String ifClass; - - private String serdeClass; - - private String storageHandlerClass; - - /** - * Initialize the storer information. - * @param ifClass the input format class - * @param ofClass the output format class - * @param serdeClass the SerDe class - * @param storageHandlerClass the storage handler class - * @param properties the properties for the storage handler - */ - public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { - super(); - this.ifClass = ifClass; - this.ofClass = ofClass; - this.serdeClass = serdeClass; - this.storageHandlerClass = storageHandlerClass; - this.properties = properties; - } - - /** - * @return the input format class - */ - public String getIfClass() { - return ifClass; - } - - /** - * @param ifClass the input format class - */ - public void setIfClass(String ifClass) { - this.ifClass = ifClass; - } - - /** - * @return the output format class - */ - public String getOfClass() { - return ofClass; - } - - /** - * @return the serdeClass - */ - public String getSerdeClass() { - return serdeClass; - } - - /** - * @return the storageHandlerClass - */ - public String getStorageHandlerClass() { - return storageHandlerClass; - } - - /** - * @return the storer properties - */ - public Properties getProperties() { - return properties; - } - - /** - * @param properties the storer properties to set - */ - public void setProperties(Properties properties) { - this.properties = properties; - } + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The properties for the storage handler */ + private Properties properties; + + private String ofClass; + + private String ifClass; + + private String serdeClass; + + private String storageHandlerClass; + + /** + * Initialize the storer information. + * @param ifClass the input format class + * @param ofClass the output format class + * @param serdeClass the SerDe class + * @param storageHandlerClass the storage handler class + * @param properties the properties for the storage handler + */ + public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { + super(); + this.ifClass = ifClass; + this.ofClass = ofClass; + this.serdeClass = serdeClass; + this.storageHandlerClass = storageHandlerClass; + this.properties = properties; + } + + /** + * @return the input format class + */ + public String getIfClass() { + return ifClass; + } + + /** + * @param ifClass the input format class + */ + public void setIfClass(String ifClass) { + this.ifClass = ifClass; + } + + /** + * @return the output format class + */ + public String getOfClass() { + return ofClass; + } + + /** + * @return the serdeClass + */ + public String getSerdeClass() { + return serdeClass; + } + + /** + * @return the storageHandlerClass + */ + public String getStorageHandlerClass() { + return storageHandlerClass; + } + + /** + * @return the storer properties + */ + public Properties getProperties() { + return properties; + } + + /** + * @param properties the storer properties to set + */ + public void setProperties(Properties properties) { + this.properties = properties; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/oozie/JavaAction.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/oozie/JavaAction.java index 67517a1..fba0241 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/oozie/JavaAction.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/oozie/JavaAction.java @@ -28,14 +28,14 @@ public class JavaAction { - public static void main(String[] args) throws Exception { + public static void main(String[] args) throws Exception { - HiveConf conf = new HiveConf(); - conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); - conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); - conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); - SessionState.start(new CliSessionState(conf)); - new CliDriver().processLine(args[0]); - } + HiveConf conf = new HiveConf(); + conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); + conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); + conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); + SessionState.start(new CliSessionState(conf)); + new CliDriver().processLine(args[0]); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java index 11db1a6..bf2b799 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java @@ -31,20 +31,20 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; public class RCFileMapReduceInputFormat - extends FileInputFormat { + extends FileInputFormat { - @Override - public RecordReader createRecordReader(InputSplit split, - TaskAttemptContext context) throws IOException, InterruptedException { + @Override + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { - context.setStatus(split.toString()); - return new RCFileMapReduceRecordReader(); - } + context.setStatus(split.toString()); + return new RCFileMapReduceRecordReader(); + } - @Override - public List getSplits(JobContext job) throws IOException { + @Override + public List getSplits(JobContext job) throws IOException { - job.getConfiguration().setLong("mapred.min.split.size", SequenceFile.SYNC_INTERVAL); - return super.getSplits(job); - } + job.getConfiguration().setLong("mapred.min.split.size", SequenceFile.SYNC_INTERVAL); + return super.getSplits(job); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceOutputFormat.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceOutputFormat.java index de5dbe6..b09ab4c 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceOutputFormat.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceOutputFormat.java @@ -38,68 +38,68 @@ * The RC file input format using new Hadoop mapreduce APIs. */ public class RCFileMapReduceOutputFormat extends - FileOutputFormat, BytesRefArrayWritable> { + FileOutputFormat, BytesRefArrayWritable> { - /** - * Set number of columns into the given configuration. - * @param conf - * configuration instance which need to set the column number - * @param columnNum - * column number for RCFile's Writer - * - */ - public static void setColumnNumber(Configuration conf, int columnNum) { - assert columnNum > 0; - conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum); - } + /** + * Set number of columns into the given configuration. + * @param conf + * configuration instance which need to set the column number + * @param columnNum + * column number for RCFile's Writer + * + */ + public static void setColumnNumber(Configuration conf, int columnNum) { + assert columnNum > 0; + conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum); + } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public org.apache.hadoop.mapreduce.RecordWriter, BytesRefArrayWritable> getRecordWriter( - TaskAttemptContext task) throws IOException, InterruptedException { + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public org.apache.hadoop.mapreduce.RecordWriter, BytesRefArrayWritable> getRecordWriter( + TaskAttemptContext task) throws IOException, InterruptedException { - //FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of - //TaskAttemptContext, so can't use that here - FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task); - Path outputPath = committer.getWorkPath(); + //FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of + //TaskAttemptContext, so can't use that here + FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task); + Path outputPath = committer.getWorkPath(); - FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); + FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); - if (!fs.exists(outputPath)) { - fs.mkdirs(outputPath); - } + if (!fs.exists(outputPath)) { + fs.mkdirs(outputPath); + } - Path file = getDefaultWorkFile(task, ""); + Path file = getDefaultWorkFile(task, ""); - CompressionCodec codec = null; - if (getCompressOutput(task)) { - Class codecClass = getOutputCompressorClass(task, DefaultCodec.class); - codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); - } + CompressionCodec codec = null; + if (getCompressOutput(task)) { + Class codecClass = getOutputCompressorClass(task, DefaultCodec.class); + codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); + } - final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec); + final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec); - return new RecordWriter, BytesRefArrayWritable>() { + return new RecordWriter, BytesRefArrayWritable>() { - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object) - */ - @Override - public void write(WritableComparable key, BytesRefArrayWritable value) - throws IOException { - out.append(value); - } + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object) + */ + @Override + public void write(WritableComparable key, BytesRefArrayWritable value) + throws IOException { + out.append(value); + } - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public void close(TaskAttemptContext task) throws IOException, InterruptedException { - out.close(); - } - }; - } + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public void close(TaskAttemptContext task) throws IOException, InterruptedException { + out.close(); + } + }; + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceRecordReader.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceRecordReader.java index d2a5175..fd42b5a 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceRecordReader.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceRecordReader.java @@ -32,90 +32,90 @@ import org.apache.hadoop.mapreduce.lib.input.FileSplit; public class RCFileMapReduceRecordReader - extends RecordReader { - - private Reader in; - private long start; - private long end; - private boolean more = true; - - // key and value objects are created once in initialize() and then reused - // for every getCurrentKey() and getCurrentValue() call. This is important - // since RCFile makes an assumption of this fact. - - private LongWritable key; - private BytesRefArrayWritable value; - - @Override - public void close() throws IOException { - in.close(); - } - - @Override - public LongWritable getCurrentKey() throws IOException, InterruptedException { - return key; + extends RecordReader { + + private Reader in; + private long start; + private long end; + private boolean more = true; + + // key and value objects are created once in initialize() and then reused + // for every getCurrentKey() and getCurrentValue() call. This is important + // since RCFile makes an assumption of this fact. + + private LongWritable key; + private BytesRefArrayWritable value; + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public LongWritable getCurrentKey() throws IOException, InterruptedException { + return key; + } + + @Override + public BytesRefArrayWritable getCurrentValue() throws IOException, InterruptedException { + return value; + } + + @Override + public float getProgress() throws IOException, InterruptedException { + if (end == start) { + return 0.0f; + } else { + return Math.min(1.0f, (in.getPosition() - start) / (float) (end - start)); } + } - @Override - public BytesRefArrayWritable getCurrentValue() throws IOException, InterruptedException { - return value; - } + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { - @Override - public float getProgress() throws IOException, InterruptedException { - if (end == start) { - return 0.0f; - } else { - return Math.min(1.0f, (in.getPosition() - start) / (float) (end - start)); - } + more = next(key); + if (more) { + in.getCurrentRow(value); } - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { + return more; + } - more = next(key); - if (more) { - in.getCurrentRow(value); - } + private boolean next(LongWritable key) throws IOException { + if (!more) { + return false; + } - return more; + more = in.next(key); + if (!more) { + return false; } - private boolean next(LongWritable key) throws IOException { - if (!more) { - return false; - } - - more = in.next(key); - if (!more) { - return false; - } - - if (in.lastSeenSyncPos() >= end) { - more = false; - return more; - } - return more; + if (in.lastSeenSyncPos() >= end) { + more = false; + return more; } + return more; + } - @Override - public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, - InterruptedException { + @Override + public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, + InterruptedException { - FileSplit fSplit = (FileSplit) split; - Path path = fSplit.getPath(); - Configuration conf = context.getConfiguration(); - this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); - this.end = fSplit.getStart() + fSplit.getLength(); + FileSplit fSplit = (FileSplit) split; + Path path = fSplit.getPath(); + Configuration conf = context.getConfiguration(); + this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); + this.end = fSplit.getStart() + fSplit.getLength(); - if (fSplit.getStart() > in.getPosition()) { - in.sync(fSplit.getStart()); - } + if (fSplit.getStart() > in.getPosition()) { + in.sync(fSplit.getStart()); + } - this.start = in.getPosition(); - more = start < end; + this.start = in.getPosition(); + more = start < end; - key = new LongWritable(); - value = new BytesRefArrayWritable(); - } + key = new LongWritable(); + value = new BytesRefArrayWritable(); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/HdfsAuthorizationProvider.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/HdfsAuthorizationProvider.java index 1139a53..3233a45 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/HdfsAuthorizationProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/HdfsAuthorizationProvider.java @@ -59,279 +59,279 @@ */ public class HdfsAuthorizationProvider extends HiveAuthorizationProviderBase { - protected Warehouse wh; - - //Config variables : create an enum to store them if we have more - private static final String PROXY_USER_NAME = "proxy.user.name"; - - public HdfsAuthorizationProvider() { - super(); + protected Warehouse wh; + + //Config variables : create an enum to store them if we have more + private static final String PROXY_USER_NAME = "proxy.user.name"; + + public HdfsAuthorizationProvider() { + super(); + } + + public HdfsAuthorizationProvider(Configuration conf) { + super(); + setConf(conf); + } + + @Override + public void init(Configuration conf) throws HiveException { + hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + try { + this.wh = new Warehouse(conf); + } catch (MetaException ex) { + throw new RuntimeException(ex); } - - public HdfsAuthorizationProvider(Configuration conf) { - super(); - setConf(conf); + } + + protected FsAction getFsAction(Privilege priv, Path path) { + + switch (priv.getPriv()) { + case ALL: + throw new AuthorizationException("no matching Action for Privilege.All"); + case ALTER_DATA: + return FsAction.WRITE; + case ALTER_METADATA: + return FsAction.WRITE; + case CREATE: + return FsAction.WRITE; + case DROP: + return FsAction.WRITE; + case INDEX: + return FsAction.WRITE; + case LOCK: + return FsAction.WRITE; + case SELECT: + return FsAction.READ; + case SHOW_DATABASE: + return FsAction.READ; + case UNKNOWN: + default: + throw new AuthorizationException("Unknown privilege"); } + } - @Override - public void init(Configuration conf) throws HiveException { - hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); - } + protected EnumSet getFsActions(Privilege[] privs, Path path) { + EnumSet actions = EnumSet.noneOf(FsAction.class); - @Override - public void setConf(Configuration conf) { - super.setConf(conf); - try { - this.wh = new Warehouse(conf); - } catch (MetaException ex) { - throw new RuntimeException(ex); - } + if (privs == null) { + return actions; } - protected FsAction getFsAction(Privilege priv, Path path) { - - switch (priv.getPriv()) { - case ALL: - throw new AuthorizationException("no matching Action for Privilege.All"); - case ALTER_DATA: - return FsAction.WRITE; - case ALTER_METADATA: - return FsAction.WRITE; - case CREATE: - return FsAction.WRITE; - case DROP: - return FsAction.WRITE; - case INDEX: - return FsAction.WRITE; - case LOCK: - return FsAction.WRITE; - case SELECT: - return FsAction.READ; - case SHOW_DATABASE: - return FsAction.READ; - case UNKNOWN: - default: - throw new AuthorizationException("Unknown privilege"); - } + for (Privilege priv : privs) { + actions.add(getFsAction(priv, path)); } - protected EnumSet getFsActions(Privilege[] privs, Path path) { - EnumSet actions = EnumSet.noneOf(FsAction.class); - - if (privs == null) { - return actions; - } + return actions; + } - for (Privilege priv : privs) { - actions.add(getFsAction(priv, path)); - } + private static final String DATABASE_WAREHOUSE_SUFFIX = ".db"; - return actions; + private Path getDefaultDatabasePath(String dbName) throws MetaException { + if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) { + return wh.getWhRoot(); } - - private static final String DATABASE_WAREHOUSE_SUFFIX = ".db"; - - private Path getDefaultDatabasePath(String dbName) throws MetaException { - if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) { - return wh.getWhRoot(); - } - return new Path(wh.getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); + return new Path(wh.getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); + } + + protected Path getDbLocation(Database db) throws HiveException { + try { + String location = db.getLocationUri(); + if (location == null) { + return getDefaultDatabasePath(db.getName()); + } else { + return wh.getDnsPath(wh.getDatabasePath(db)); + } + } catch (MetaException ex) { + throw new HiveException(ex.getMessage()); } - - protected Path getDbLocation(Database db) throws HiveException { - try { - String location = db.getLocationUri(); - if (location == null) { - return getDefaultDatabasePath(db.getName()); - } else { - return wh.getDnsPath(wh.getDatabasePath(db)); - } - } catch (MetaException ex) { - throw new HiveException(ex.getMessage()); - } + } + + @Override + public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //Authorize for global level permissions at the warehouse dir + Path root; + try { + root = wh.getWhRoot(); + authorize(root, readRequiredPriv, writeRequiredPriv); + } catch (MetaException ex) { + throw new HiveException(ex); } + } - @Override - public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //Authorize for global level permissions at the warehouse dir - Path root; - try { - root = wh.getWhRoot(); - authorize(root, readRequiredPriv, writeRequiredPriv); - } catch (MetaException ex) { - throw new HiveException(ex); - } + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (db == null) { + return; } - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (db == null) { - return; - } + Path path = getDbLocation(db); - Path path = getDbLocation(db); + authorize(path, readRequiredPriv, writeRequiredPriv); + } - authorize(path, readRequiredPriv, writeRequiredPriv); + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (table == null) { + return; } - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (table == null) { - return; - } - - //unlike Hive's model, this can be called at CREATE TABLE as well, since we should authorize - //against the table's declared location - Path path = null; - try { - if (table.getTTable().getSd().getLocation() == null - || table.getTTable().getSd().getLocation().isEmpty()) { - path = wh.getTablePath(hive_db.getDatabase(table.getDbName()), table.getTableName()); - } else { - path = table.getPath(); - } - } catch (MetaException ex) { - throw new HiveException(ex); - } - - authorize(path, readRequiredPriv, writeRequiredPriv); + //unlike Hive's model, this can be called at CREATE TABLE as well, since we should authorize + //against the table's declared location + Path path = null; + try { + if (table.getTTable().getSd().getLocation() == null + || table.getTTable().getSd().getLocation().isEmpty()) { + path = wh.getTablePath(hive_db.getDatabase(table.getDbName()), table.getTableName()); + } else { + path = table.getPath(); + } + } catch (MetaException ex) { + throw new HiveException(ex); } - //TODO: HiveAuthorizationProvider should expose this interface instead of #authorize(Partition, Privilege[], Privilege[]) - public void authorize(Table table, Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { + authorize(path, readRequiredPriv, writeRequiredPriv); + } - if (part == null || part.getLocation() == null) { - authorize(table, readRequiredPriv, writeRequiredPriv); - } else { - authorize(part.getPartitionPath(), readRequiredPriv, writeRequiredPriv); - } - } + //TODO: HiveAuthorizationProvider should expose this interface instead of #authorize(Partition, Privilege[], Privilege[]) + public void authorize(Table table, Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (part == null) { - return; - } - authorize(part.getTable(), part, readRequiredPriv, writeRequiredPriv); + if (part == null || part.getLocation() == null) { + authorize(table, readRequiredPriv, writeRequiredPriv); + } else { + authorize(part.getPartitionPath(), readRequiredPriv, writeRequiredPriv); } + } - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - //columns cannot live in different files, just check for partition level permissions - authorize(table, part, readRequiredPriv, writeRequiredPriv); + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (part == null) { + return; } - - /** - * Authorization privileges against a path. - * @param path a filesystem path - * @param readRequiredPriv a list of privileges needed for inputs. - * @param writeRequiredPriv a list of privileges needed for outputs. - */ - public void authorize(Path path, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - try { - EnumSet actions = getFsActions(readRequiredPriv, path); - actions.addAll(getFsActions(writeRequiredPriv, path)); - if (actions.isEmpty()) { - return; - } - - checkPermissions(getConf(), path, actions); - - } catch (AccessControlException ex) { - throw new AuthorizationException(ex); - } catch (LoginException ex) { - throw new AuthorizationException(ex); - } catch (IOException ex) { - throw new HiveException(ex); - } + authorize(part.getTable(), part, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + //columns cannot live in different files, just check for partition level permissions + authorize(table, part, readRequiredPriv, writeRequiredPriv); + } + + /** + * Authorization privileges against a path. + * @param path a filesystem path + * @param readRequiredPriv a list of privileges needed for inputs. + * @param writeRequiredPriv a list of privileges needed for outputs. + */ + public void authorize(Path path, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + try { + EnumSet actions = getFsActions(readRequiredPriv, path); + actions.addAll(getFsActions(writeRequiredPriv, path)); + if (actions.isEmpty()) { + return; + } + + checkPermissions(getConf(), path, actions); + + } catch (AccessControlException ex) { + throw new AuthorizationException(ex); + } catch (LoginException ex) { + throw new AuthorizationException(ex); + } catch (IOException ex) { + throw new HiveException(ex); } + } - /** - * Checks the permissions for the given path and current user on Hadoop FS. If the given path - * does not exists, it checks for it's parent folder. - */ - protected static void checkPermissions(final Configuration conf, final Path path, - final EnumSet actions) throws IOException, LoginException { + /** + * Checks the permissions for the given path and current user on Hadoop FS. If the given path + * does not exists, it checks for it's parent folder. + */ + protected static void checkPermissions(final Configuration conf, final Path path, + final EnumSet actions) throws IOException, LoginException { - if (path == null) { - throw new IllegalArgumentException("path is null"); - } + if (path == null) { + throw new IllegalArgumentException("path is null"); + } - HadoopShims shims = ShimLoader.getHadoopShims(); - final UserGroupInformation ugi; - if (conf.get(PROXY_USER_NAME) != null) { - ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME)); - } else { - ugi = shims.getUGIForConf(conf); - } - final String user = shims.getShortUserName(ugi); - - final FileSystem fs = path.getFileSystem(conf); - - if (fs.exists(path)) { - checkPermissions(fs, path, actions, user, ugi.getGroupNames()); - } else if (path.getParent() != null) { - // find the ancestor which exists to check it's permissions - Path par = path.getParent(); - while (par != null) { - if (fs.exists(par)) { - break; - } - par = par.getParent(); - } - - checkPermissions(fs, par, actions, user, ugi.getGroupNames()); + HadoopShims shims = ShimLoader.getHadoopShims(); + final UserGroupInformation ugi; + if (conf.get(PROXY_USER_NAME) != null) { + ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME)); + } else { + ugi = shims.getUGIForConf(conf); + } + final String user = shims.getShortUserName(ugi); + + final FileSystem fs = path.getFileSystem(conf); + + if (fs.exists(path)) { + checkPermissions(fs, path, actions, user, ugi.getGroupNames()); + } else if (path.getParent() != null) { + // find the ancestor which exists to check it's permissions + Path par = path.getParent(); + while (par != null) { + if (fs.exists(par)) { + break; } + par = par.getParent(); + } + + checkPermissions(fs, par, actions, user, ugi.getGroupNames()); + } + } + + /** + * Checks the permissions for the given path and current user on Hadoop FS. If the given path + * does not exists, it returns. + */ + @SuppressWarnings("deprecation") + protected static void checkPermissions(final FileSystem fs, final Path path, + final EnumSet actions, String user, String[] groups) throws IOException, + AccessControlException { + + final FileStatus stat; + + try { + stat = fs.getFileStatus(path); + } catch (FileNotFoundException fnfe) { + // File named by path doesn't exist; nothing to validate. + return; + } catch (org.apache.hadoop.fs.permission.AccessControlException ace) { + // Older hadoop version will throw this @deprecated Exception. + throw new AccessControlException(ace.getMessage()); } - /** - * Checks the permissions for the given path and current user on Hadoop FS. If the given path - * does not exists, it returns. - */ - @SuppressWarnings("deprecation") - protected static void checkPermissions(final FileSystem fs, final Path path, - final EnumSet actions, String user, String[] groups) throws IOException, - AccessControlException { - - final FileStatus stat; - - try { - stat = fs.getFileStatus(path); - } catch (FileNotFoundException fnfe) { - // File named by path doesn't exist; nothing to validate. - return; - } catch (org.apache.hadoop.fs.permission.AccessControlException ace) { - // Older hadoop version will throw this @deprecated Exception. - throw new AccessControlException(ace.getMessage()); - } + final FsPermission dirPerms = stat.getPermission(); + final String grp = stat.getGroup(); - final FsPermission dirPerms = stat.getPermission(); - final String grp = stat.getGroup(); - - for (FsAction action : actions) { - if (user.equals(stat.getOwner())) { - if (dirPerms.getUserAction().implies(action)) { - continue; - } - } - if (ArrayUtils.contains(groups, grp)) { - if (dirPerms.getGroupAction().implies(action)) { - continue; - } - } - if (dirPerms.getOtherAction().implies(action)) { - continue; - } - throw new AccessControlException("action " + action + " not permitted on path " - + path + " for user " + user); + for (FsAction action : actions) { + if (user.equals(stat.getOwner())) { + if (dirPerms.getUserAction().implies(action)) { + continue; + } + } + if (ArrayUtils.contains(groups, grp)) { + if (dirPerms.getGroupAction().implies(action)) { + continue; } + } + if (dirPerms.getOtherAction().implies(action)) { + continue; + } + throw new AccessControlException("action " + action + " not permitted on path " + + path + " for user " + user); } + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/StorageDelegationAuthorizationProvider.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/StorageDelegationAuthorizationProvider.java index 6c28803..da1962e 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/StorageDelegationAuthorizationProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/StorageDelegationAuthorizationProvider.java @@ -45,98 +45,98 @@ */ public class StorageDelegationAuthorizationProvider extends HiveAuthorizationProviderBase { - protected HiveAuthorizationProvider hdfsAuthorizer = new HdfsAuthorizationProvider(); - - protected static Map authProviders = new HashMap(); - - @Override - public void setConf(Configuration conf) { - super.setConf(conf); - hdfsAuthorizer.setConf(conf); - } - - @Override - public void init(Configuration conf) throws HiveException { - hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); - } - - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - super.setAuthenticator(authenticator); - hdfsAuthorizer.setAuthenticator(authenticator); - } - - static { - registerAuthProvider("org.apache.hadoop.hive.hbase.HBaseStorageHandler", - "org.apache.hive.hcatalog.hbase.HBaseAuthorizationProvider"); - registerAuthProvider("org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler", - "org.apache.hive.hcatalog.hbase.HBaseAuthorizationProvider"); - } - - //workaround until Hive adds StorageHandler.getAuthorizationProvider(). Remove these parts afterwards - public static void registerAuthProvider(String storageHandlerClass, - String authProviderClass) { - authProviders.put(storageHandlerClass, authProviderClass); - } - - /** Returns the StorageHandler of the Table obtained from the HCatStorageHandler */ - protected HiveAuthorizationProvider getDelegate(Table table) throws HiveException { - HiveStorageHandler handler = table.getStorageHandler(); - - if (handler != null) { - if (handler instanceof HCatStorageHandler) { - return ((HCatStorageHandler) handler).getAuthorizationProvider(); - } else { - String authProviderClass = authProviders.get(handler.getClass().getCanonicalName()); - - if (authProviderClass != null) { - try { - ReflectionUtils.newInstance(getConf().getClassByName(authProviderClass), getConf()); - } catch (ClassNotFoundException ex) { - throw new HiveException("Cannot instantiate delegation AuthotizationProvider"); - } - } - - //else we do not have anything to delegate to - throw new HiveException(String.format("Storage Handler for table:%s is not an instance " + - "of HCatStorageHandler", table.getTableName())); - } - } else { - //return an authorizer for HDFS - return hdfsAuthorizer; + protected HiveAuthorizationProvider hdfsAuthorizer = new HdfsAuthorizationProvider(); + + protected static Map authProviders = new HashMap(); + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + hdfsAuthorizer.setConf(conf); + } + + @Override + public void init(Configuration conf) throws HiveException { + hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); + } + + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + super.setAuthenticator(authenticator); + hdfsAuthorizer.setAuthenticator(authenticator); + } + + static { + registerAuthProvider("org.apache.hadoop.hive.hbase.HBaseStorageHandler", + "org.apache.hive.hcatalog.hbase.HBaseAuthorizationProvider"); + registerAuthProvider("org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler", + "org.apache.hive.hcatalog.hbase.HBaseAuthorizationProvider"); + } + + //workaround until Hive adds StorageHandler.getAuthorizationProvider(). Remove these parts afterwards + public static void registerAuthProvider(String storageHandlerClass, + String authProviderClass) { + authProviders.put(storageHandlerClass, authProviderClass); + } + + /** Returns the StorageHandler of the Table obtained from the HCatStorageHandler */ + protected HiveAuthorizationProvider getDelegate(Table table) throws HiveException { + HiveStorageHandler handler = table.getStorageHandler(); + + if (handler != null) { + if (handler instanceof HCatStorageHandler) { + return ((HCatStorageHandler) handler).getAuthorizationProvider(); + } else { + String authProviderClass = authProviders.get(handler.getClass().getCanonicalName()); + + if (authProviderClass != null) { + try { + ReflectionUtils.newInstance(getConf().getClassByName(authProviderClass), getConf()); + } catch (ClassNotFoundException ex) { + throw new HiveException("Cannot instantiate delegation AuthotizationProvider"); + } } - } - - @Override - public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //global authorizations against warehouse hdfs directory - hdfsAuthorizer.authorize(readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //db's are tied to a hdfs location - hdfsAuthorizer.authorize(db, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - getDelegate(table).authorize(table, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { - getDelegate(part.getTable()).authorize(part, readRequiredPriv, writeRequiredPriv); - } - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - getDelegate(table).authorize(table, part, columns, readRequiredPriv, writeRequiredPriv); + //else we do not have anything to delegate to + throw new HiveException(String.format("Storage Handler for table:%s is not an instance " + + "of HCatStorageHandler", table.getTableName())); + } + } else { + //return an authorizer for HDFS + return hdfsAuthorizer; } + } + + @Override + public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //global authorizations against warehouse hdfs directory + hdfsAuthorizer.authorize(readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //db's are tied to a hdfs location + hdfsAuthorizer.authorize(db, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + getDelegate(table).authorize(table, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { + getDelegate(part.getTable()).authorize(part, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + getDelegate(table).authorize(table, part, columns, readRequiredPriv, writeRequiredPriv); + } } diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/storagehandler/DummyHCatAuthProvider.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/storagehandler/DummyHCatAuthProvider.java index 5ced56c..9030cc6 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/storagehandler/DummyHCatAuthProvider.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/storagehandler/DummyHCatAuthProvider.java @@ -37,108 +37,108 @@ */ class DummyHCatAuthProvider implements HiveAuthorizationProvider { - @Override - public Configuration getConf() { - return null; - } + @Override + public Configuration getConf() { + return null; + } - @Override - public void setConf(Configuration conf) { - } + @Override + public void setConf(Configuration conf) { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #init(org.apache.hadoop.conf.Configuration) + */ + @Override + public void init(Configuration conf) throws HiveException { + } - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } + @Override + public HiveAuthenticationProvider getAuthenticator() { + return null; + } - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.metastore.api.Database, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.metastore.api.Database, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/ExitException.java b/hcatalog/core/src/test/java/org/apache/hcatalog/ExitException.java index 0c631d6..025f86e 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/ExitException.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/ExitException.java @@ -23,19 +23,19 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.ExitException} instead */ public class ExitException extends SecurityException { - private static final long serialVersionUID = -1982617086752946683L; - private final int status; + private static final long serialVersionUID = -1982617086752946683L; + private final int status; - /** - * @return the status - */ - public int getStatus() { - return status; - } + /** + * @return the status + */ + public int getStatus() { + return status; + } - public ExitException(int status) { + public ExitException(int status) { - super("Raising exception, instead of System.exit(). Return code was: " + status); - this.status = status; - } + super("Raising exception, instead of System.exit(). Return code was: " + status); + this.status = status; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/HcatTestUtils.java b/hcatalog/core/src/test/java/org/apache/hcatalog/HcatTestUtils.java index 67e1050..86e42f5 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/HcatTestUtils.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/HcatTestUtils.java @@ -39,63 +39,63 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.HcatTestUtils} instead */ public class HcatTestUtils { - private static final Logger LOG = LoggerFactory.getLogger(HcatTestUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(HcatTestUtils.class); - public static FsPermission perm007 = FsPermission.createImmutable((short) 0007); // -------rwx - public static FsPermission perm070 = FsPermission.createImmutable((short) 0070); // ----rwx--- - public static FsPermission perm700 = FsPermission.createImmutable((short) 0700); // -rwx------ - public static FsPermission perm755 = FsPermission.createImmutable((short) 0755); // -rwxr-xr-x - public static FsPermission perm777 = FsPermission.createImmutable((short) 0777); // -rwxrwxrwx - public static FsPermission perm300 = FsPermission.createImmutable((short) 0300); // --wx------ - public static FsPermission perm500 = FsPermission.createImmutable((short) 0500); // -r-x------ - public static FsPermission perm555 = FsPermission.createImmutable((short) 0555); // -r-xr-xr-x + public static FsPermission perm007 = FsPermission.createImmutable((short) 0007); // -------rwx + public static FsPermission perm070 = FsPermission.createImmutable((short) 0070); // ----rwx--- + public static FsPermission perm700 = FsPermission.createImmutable((short) 0700); // -rwx------ + public static FsPermission perm755 = FsPermission.createImmutable((short) 0755); // -rwxr-xr-x + public static FsPermission perm777 = FsPermission.createImmutable((short) 0777); // -rwxrwxrwx + public static FsPermission perm300 = FsPermission.createImmutable((short) 0300); // --wx------ + public static FsPermission perm500 = FsPermission.createImmutable((short) 0500); // -r-x------ + public static FsPermission perm555 = FsPermission.createImmutable((short) 0555); // -r-xr-xr-x - /** - * Returns the database path. - */ - public static Path getDbPath(Hive hive, Warehouse wh, String dbName) throws MetaException, HiveException { - return wh.getDatabasePath(hive.getDatabase(dbName)); - } - - /** - * Removes all databases and tables from the metastore - */ - public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm) - throws HiveException, MetaException, NoSuchObjectException { - for (String dbName : hive.getAllDatabases()) { - if (dbName.equals("default")) { - continue; - } - try { - Path path = getDbPath(hive, wh, dbName); - FileSystem whFs = path.getFileSystem(hive.getConf()); - whFs.setPermission(path, defaultPerm); - } catch (IOException ex) { - //ignore - } - hive.dropDatabase(dbName, true, true, true); - } + /** + * Returns the database path. + */ + public static Path getDbPath(Hive hive, Warehouse wh, String dbName) throws MetaException, HiveException { + return wh.getDatabasePath(hive.getDatabase(dbName)); + } - //clean tables in default db - for (String tablename : hive.getAllTables("default")) { - hive.dropTable("default", tablename, true, true); - } + /** + * Removes all databases and tables from the metastore + */ + public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm) + throws HiveException, MetaException, NoSuchObjectException { + for (String dbName : hive.getAllDatabases()) { + if (dbName.equals("default")) { + continue; + } + try { + Path path = getDbPath(hive, wh, dbName); + FileSystem whFs = path.getFileSystem(hive.getConf()); + whFs.setPermission(path, defaultPerm); + } catch (IOException ex) { + //ignore + } + hive.dropDatabase(dbName, true, true, true); } - public static void createTestDataFile(String filename, String[] lines) throws IOException { - FileWriter writer = null; - try { - File file = new File(filename); - file.deleteOnExit(); - writer = new FileWriter(file); - for (String line : lines) { - writer.write(line + "\n"); - } - } finally { - if (writer != null) { - writer.close(); - } - } + //clean tables in default db + for (String tablename : hive.getAllTables("default")) { + hive.dropTable("default", tablename, true, true); + } + } + public static void createTestDataFile(String filename, String[] lines) throws IOException { + FileWriter writer = null; + try { + File file = new File(filename); + file.deleteOnExit(); + writer = new FileWriter(file); + for (String line : lines) { + writer.write(line + "\n"); + } + } finally { + if (writer != null) { + writer.close(); + } } + + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/MiniCluster.java b/hcatalog/core/src/test/java/org/apache/hcatalog/MiniCluster.java index 518d3cc..f1642cf 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/MiniCluster.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/MiniCluster.java @@ -44,159 +44,159 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.MiniCluster} instead */ public class MiniCluster { - private MiniDFSCluster m_dfs = null; - private MiniMRCluster m_mr = null; - private FileSystem m_fileSys = null; - private JobConf m_conf = null; - - private final static MiniCluster INSTANCE = new MiniCluster(); - private static boolean isSetup = true; - - private MiniCluster() { - setupMiniDfsAndMrClusters(); + private MiniDFSCluster m_dfs = null; + private MiniMRCluster m_mr = null; + private FileSystem m_fileSys = null; + private JobConf m_conf = null; + + private final static MiniCluster INSTANCE = new MiniCluster(); + private static boolean isSetup = true; + + private MiniCluster() { + setupMiniDfsAndMrClusters(); + } + + private void setupMiniDfsAndMrClusters() { + try { + final int dataNodes = 1; // There will be 4 data nodes + final int taskTrackers = 1; // There will be 4 task tracker nodes + Configuration config = new Configuration(); + + // Builds and starts the mini dfs and mapreduce clusters + System.setProperty("hadoop.log.dir", "."); + m_dfs = new MiniDFSCluster(config, dataNodes, true, null); + + m_fileSys = m_dfs.getFileSystem(); + m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); + + // Create the configuration hadoop-site.xml file + File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); + conf_dir.mkdirs(); + File conf_file = new File(conf_dir, "hadoop-site.xml"); + + // Write the necessary config info to hadoop-site.xml + m_conf = m_mr.createJobConf(); + m_conf.setInt("mapred.submit.replication", 1); + m_conf.set("dfs.datanode.address", "0.0.0.0:0"); + m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); + m_conf.writeXml(new FileOutputStream(conf_file)); + + // Set the system properties needed by Pig + System.setProperty("cluster", m_conf.get("mapred.job.tracker")); + System.setProperty("namenode", m_conf.get("fs.default.name")); + System.setProperty("junit.hadoop.conf", conf_dir.getPath()); + } catch (IOException e) { + throw new RuntimeException(e); } - - private void setupMiniDfsAndMrClusters() { - try { - final int dataNodes = 1; // There will be 4 data nodes - final int taskTrackers = 1; // There will be 4 task tracker nodes - Configuration config = new Configuration(); - - // Builds and starts the mini dfs and mapreduce clusters - System.setProperty("hadoop.log.dir", "."); - m_dfs = new MiniDFSCluster(config, dataNodes, true, null); - - m_fileSys = m_dfs.getFileSystem(); - m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); - - // Create the configuration hadoop-site.xml file - File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); - conf_dir.mkdirs(); - File conf_file = new File(conf_dir, "hadoop-site.xml"); - - // Write the necessary config info to hadoop-site.xml - m_conf = m_mr.createJobConf(); - m_conf.setInt("mapred.submit.replication", 1); - m_conf.set("dfs.datanode.address", "0.0.0.0:0"); - m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); - m_conf.writeXml(new FileOutputStream(conf_file)); - - // Set the system properties needed by Pig - System.setProperty("cluster", m_conf.get("mapred.job.tracker")); - System.setProperty("namenode", m_conf.get("fs.default.name")); - System.setProperty("junit.hadoop.conf", conf_dir.getPath()); - } catch (IOException e) { - throw new RuntimeException(e); - } + } + + /** + * Returns the single instance of class MiniClusterBuilder that + * represents the resouces for a mini dfs cluster and a mini + * mapreduce cluster. + */ + public static MiniCluster buildCluster() { + if (!isSetup) { + INSTANCE.setupMiniDfsAndMrClusters(); + isSetup = true; } - - /** - * Returns the single instance of class MiniClusterBuilder that - * represents the resouces for a mini dfs cluster and a mini - * mapreduce cluster. - */ - public static MiniCluster buildCluster() { - if (!isSetup) { - INSTANCE.setupMiniDfsAndMrClusters(); - isSetup = true; - } - return INSTANCE; + return INSTANCE; + } + + public void shutDown() { + INSTANCE.shutdownMiniDfsAndMrClusters(); + } + + @Override + protected void finalize() { + shutdownMiniDfsAndMrClusters(); + } + + private void shutdownMiniDfsAndMrClusters() { + isSetup = false; + try { + if (m_fileSys != null) { + m_fileSys.close(); + } + } catch (IOException e) { + e.printStackTrace(); } - - public void shutDown() { - INSTANCE.shutdownMiniDfsAndMrClusters(); + if (m_dfs != null) { + m_dfs.shutdown(); } - - @Override - protected void finalize() { - shutdownMiniDfsAndMrClusters(); + if (m_mr != null) { + m_mr.shutdown(); } - - private void shutdownMiniDfsAndMrClusters() { - isSetup = false; - try { - if (m_fileSys != null) { - m_fileSys.close(); - } - } catch (IOException e) { - e.printStackTrace(); - } - if (m_dfs != null) { - m_dfs.shutdown(); - } - if (m_mr != null) { - m_mr.shutdown(); - } - m_fileSys = null; - m_dfs = null; - m_mr = null; + m_fileSys = null; + m_dfs = null; + m_mr = null; + } + + public Properties getProperties() { + errorIfNotSetup(); + Properties properties = new Properties(); + assert m_conf != null; + Iterator> iter = m_conf.iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + properties.put(entry.getKey(), entry.getValue()); } - - public Properties getProperties() { - errorIfNotSetup(); - Properties properties = new Properties(); - assert m_conf != null; - Iterator> iter = m_conf.iterator(); - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - properties.put(entry.getKey(), entry.getValue()); - } - return properties; + return properties; + } + + public void setProperty(String name, String value) { + errorIfNotSetup(); + m_conf.set(name, value); + } + + public FileSystem getFileSystem() { + errorIfNotSetup(); + return m_fileSys; + } + + /** + * Throw RunTimeException if isSetup is false + */ + private void errorIfNotSetup() { + if (isSetup) { + return; } - - public void setProperty(String name, String value) { - errorIfNotSetup(); - m_conf.set(name, value); + String msg = "function called on MiniCluster that has been shutdown"; + throw new RuntimeException(msg); + } + + static public void createInputFile(MiniCluster miniCluster, String fileName, + String[] inputData) + throws IOException { + FileSystem fs = miniCluster.getFileSystem(); + createInputFile(fs, fileName, inputData); + } + + static public void createInputFile(FileSystem fs, String fileName, + String[] inputData) throws IOException { + Path path = new Path(fileName); + if (fs.exists(path)) { + throw new IOException("File " + fileName + " already exists on the minicluster"); } - - public FileSystem getFileSystem() { - errorIfNotSetup(); - return m_fileSys; - } - - /** - * Throw RunTimeException if isSetup is false - */ - private void errorIfNotSetup() { - if (isSetup) { - return; - } - String msg = "function called on MiniCluster that has been shutdown"; - throw new RuntimeException(msg); - } - - static public void createInputFile(MiniCluster miniCluster, String fileName, - String[] inputData) - throws IOException { - FileSystem fs = miniCluster.getFileSystem(); - createInputFile(fs, fileName, inputData); - } - - static public void createInputFile(FileSystem fs, String fileName, - String[] inputData) throws IOException { - Path path = new Path(fileName); - if (fs.exists(path)) { - throw new IOException("File " + fileName + " already exists on the minicluster"); - } - FSDataOutputStream stream = fs.create(path); - PrintWriter pw = new PrintWriter(new OutputStreamWriter(stream, "UTF-8")); - for (int i = 0; i < inputData.length; i++) { - pw.println(inputData[i]); - } - pw.close(); - - } - - /** - * Helper to remove a dfs file from the minicluster DFS - * - * @param miniCluster reference to the Minicluster where the file should be deleted - * @param fileName pathname of the file to be deleted - * @throws IOException - */ - static public void deleteFile(MiniCluster miniCluster, String fileName) - throws IOException { - FileSystem fs = miniCluster.getFileSystem(); - fs.delete(new Path(fileName), true); + FSDataOutputStream stream = fs.create(path); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(stream, "UTF-8")); + for (int i = 0; i < inputData.length; i++) { + pw.println(inputData[i]); } + pw.close(); + + } + + /** + * Helper to remove a dfs file from the minicluster DFS + * + * @param miniCluster reference to the Minicluster where the file should be deleted + * @param fileName pathname of the file to be deleted + * @throws IOException + */ + static public void deleteFile(MiniCluster miniCluster, String fileName) + throws IOException { + FileSystem fs = miniCluster.getFileSystem(); + fs.delete(new Path(fileName), true); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/NoExitSecurityManager.java b/hcatalog/core/src/test/java/org/apache/hcatalog/NoExitSecurityManager.java index 31d32be..684dea1 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/NoExitSecurityManager.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/NoExitSecurityManager.java @@ -26,20 +26,20 @@ */ public class NoExitSecurityManager extends SecurityManager { - @Override - public void checkPermission(Permission perm) { - // allow anything. - } + @Override + public void checkPermission(Permission perm) { + // allow anything. + } - @Override - public void checkPermission(Permission perm, Object context) { - // allow anything. - } + @Override + public void checkPermission(Permission perm, Object context) { + // allow anything. + } - @Override - public void checkExit(int status) { + @Override + public void checkExit(int status) { - super.checkExit(status); - throw new ExitException(status); - } + super.checkExit(status); + throw new ExitException(status); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/DummyStorageHandler.java b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/DummyStorageHandler.java index 8d6bffc..1447a56 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/DummyStorageHandler.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/DummyStorageHandler.java @@ -56,245 +56,245 @@ */ class DummyStorageHandler extends HCatStorageHandler { + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration conf) { + } + + @Override + public Class getInputFormatClass() { + return DummyInputFormat.class; + } + + @Override + public Class getOutputFormatClass() { + return DummyOutputFormat.class; + } + + @Override + public Class getSerDeClass() { + return ColumnarSerDe.class; + } + + @Override + public HiveMetaHook getMetaHook() { + return null; + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + } + + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + return new DummyAuthProvider(); + } + @Override + public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { + //do nothing by default + //EK: added the same (no-op) implementation as in + // org.apache.hive.hcatalog.DefaultStorageHandler (hive 0.12) + // this is needed to get 0.11 API compat layer to work + // see HIVE-4896 + } + + private class DummyAuthProvider implements HiveAuthorizationProvider { + @Override public Configuration getConf() { - return null; + return null; } + /* @param conf + * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf.Configuration) + */ @Override public void setConf(Configuration conf) { } + /* @param conf + /* @throws HiveException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#init(org.apache.hadoop.conf.Configuration) + */ @Override - public Class getInputFormatClass() { - return DummyInputFormat.class; + public void init(Configuration conf) throws HiveException { } + /* @return HiveAuthenticationProvider + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#getAuthenticator() + */ @Override - public Class getOutputFormatClass() { - return DummyOutputFormat.class; + public HiveAuthenticationProvider getAuthenticator() { + return null; } + /* @param authenticator + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#setAuthenticator(org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider) + */ @Override - public Class getSerDeClass() { - return ColumnarSerDe.class; + public void setAuthenticator(HiveAuthenticationProvider authenticator) { } + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public HiveMetaHook getMetaHook() { - return null; + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { } + /* @param db + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.metastore.api.Database, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { } + /* @param table + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { } + /* @param part + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Partition, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - return new DummyAuthProvider(); + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { } + + /* @param table + /* @param part + /* @param columns + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { - //do nothing by default - //EK: added the same (no-op) implementation as in - // org.apache.hive.hcatalog.DefaultStorageHandler (hive 0.12) - // this is needed to get 0.11 API compat layer to work - // see HIVE-4896 + public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { } - private class DummyAuthProvider implements HiveAuthorizationProvider { - - @Override - public Configuration getConf() { - return null; - } - - /* @param conf - * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf.Configuration) - */ - @Override - public void setConf(Configuration conf) { - } - - /* @param conf - /* @throws HiveException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } - - /* @return HiveAuthenticationProvider - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#getAuthenticator() - */ - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } - - /* @param authenticator - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#setAuthenticator(org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider) - */ - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } - - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param db - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.metastore.api.Database, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param table - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param part - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Partition, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param table - /* @param part - /* @param columns - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } + } + + /** + * The Class DummyInputFormat is a dummy implementation of the old hadoop + * mapred.InputFormat required by HiveStorageHandler. + */ + class DummyInputFormat implements + InputFormat { + /* + * @see + * org.apache.hadoop.mapred.InputFormat#getRecordReader(org.apache.hadoop + * .mapred.InputSplit, org.apache.hadoop.mapred.JobConf, + * org.apache.hadoop.mapred.Reporter) + */ + @Override + public RecordReader getRecordReader( + InputSplit split, JobConf jobconf, Reporter reporter) + throws IOException { + throw new IOException("This operation is not supported."); } - /** - * The Class DummyInputFormat is a dummy implementation of the old hadoop - * mapred.InputFormat required by HiveStorageHandler. + /* + * @see + * org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop. + * mapred .JobConf, int) + */ + @Override + public InputSplit[] getSplits(JobConf jobconf, int number) + throws IOException { + throw new IOException("This operation is not supported."); + } + } + + /** + * The Class DummyOutputFormat is a dummy implementation of the old hadoop + * mapred.OutputFormat and HiveOutputFormat required by HiveStorageHandler. + */ + class DummyOutputFormat implements + OutputFormat, HCatRecord>, + HiveOutputFormat, HCatRecord> { + + /* + * @see + * org.apache.hadoop.mapred.OutputFormat#checkOutputSpecs(org.apache + * .hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf) */ - class DummyInputFormat implements - InputFormat { - - /* - * @see - * org.apache.hadoop.mapred.InputFormat#getRecordReader(org.apache.hadoop - * .mapred.InputSplit, org.apache.hadoop.mapred.JobConf, - * org.apache.hadoop.mapred.Reporter) - */ - @Override - public RecordReader getRecordReader( - InputSplit split, JobConf jobconf, Reporter reporter) - throws IOException { - throw new IOException("This operation is not supported."); - } - - /* - * @see - * org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop. - * mapred .JobConf, int) - */ - @Override - public InputSplit[] getSplits(JobConf jobconf, int number) - throws IOException { - throw new IOException("This operation is not supported."); - } + @Override + public void checkOutputSpecs(FileSystem fs, JobConf jobconf) + throws IOException { + throw new IOException("This operation is not supported."); + } - /** - * The Class DummyOutputFormat is a dummy implementation of the old hadoop - * mapred.OutputFormat and HiveOutputFormat required by HiveStorageHandler. + /* + * @see + * org.apache.hadoop.mapred.OutputFormat#getRecordWriter(org.apache. + * hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf, + * java.lang.String, org.apache.hadoop.util.Progressable) */ - class DummyOutputFormat implements - OutputFormat, HCatRecord>, - HiveOutputFormat, HCatRecord> { - - /* - * @see - * org.apache.hadoop.mapred.OutputFormat#checkOutputSpecs(org.apache - * .hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf) - */ - @Override - public void checkOutputSpecs(FileSystem fs, JobConf jobconf) - throws IOException { - throw new IOException("This operation is not supported."); - - } - - /* - * @see - * org.apache.hadoop.mapred.OutputFormat#getRecordWriter(org.apache. - * hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf, - * java.lang.String, org.apache.hadoop.util.Progressable) - */ - @Override - public RecordWriter, HCatRecord> getRecordWriter( - FileSystem fs, JobConf jobconf, String str, - Progressable progress) throws IOException { - throw new IOException("This operation is not supported."); - } - - /* - * @see - * org.apache.hadoop.hive.ql.io.HiveOutputFormat#getHiveRecordWriter(org - * .apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path, - * java.lang.Class, boolean, java.util.Properties, - * org.apache.hadoop.util.Progressable) - */ - @Override - public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( - JobConf jc, Path finalOutPath, - Class valueClass, boolean isCompressed, - Properties tableProperties, Progressable progress) - throws IOException { - throw new IOException("This operation is not supported."); - } + @Override + public RecordWriter, HCatRecord> getRecordWriter( + FileSystem fs, JobConf jobconf, String str, + Progressable progress) throws IOException { + throw new IOException("This operation is not supported."); + } + /* + * @see + * org.apache.hadoop.hive.ql.io.HiveOutputFormat#getHiveRecordWriter(org + * .apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path, + * java.lang.Class, boolean, java.util.Properties, + * org.apache.hadoop.util.Progressable) + */ + @Override + public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( + JobConf jc, Path finalOutPath, + Class valueClass, boolean isCompressed, + Properties tableProperties, Progressable progress) + throws IOException { + throw new IOException("This operation is not supported."); } + } + } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestPermsGrp.java b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestPermsGrp.java index 1ec1e3a..e566ee0 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestPermsGrp.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestPermsGrp.java @@ -56,178 +56,178 @@ */ public class TestPermsGrp extends TestCase { - private boolean isServerRunning = false; - private static final int msPort = 20101; - private HiveConf hcatConf; - private Warehouse clientWH; - private HiveMetaStoreClient msc; - private static final Logger LOG = LoggerFactory.getLogger(TestPermsGrp.class); - - @Override - protected void tearDown() throws Exception { - System.setSecurityManager(securityManager); + private boolean isServerRunning = false; + private static final int msPort = 20101; + private HiveConf hcatConf; + private Warehouse clientWH; + private HiveMetaStoreClient msc; + private static final Logger LOG = LoggerFactory.getLogger(TestPermsGrp.class); + + @Override + protected void tearDown() throws Exception { + System.setSecurityManager(securityManager); + } + + @Override + protected void setUp() throws Exception { + + if (isServerRunning) { + return; } - @Override - protected void setUp() throws Exception { - - if (isServerRunning) { - return; - } - - MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); - - isServerRunning = true; - - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://127.0.0.1:" + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - clientWH = new Warehouse(hcatConf); - msc = new HiveMetaStoreClient(hcatConf, null); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); + + isServerRunning = true; + + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + + hcatConf = new HiveConf(this.getClass()); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://127.0.0.1:" + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + clientWH = new Warehouse(hcatConf); + msc = new HiveMetaStoreClient(hcatConf, null); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + + public void testCustomPerms() throws Exception { + + String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + String tblName = "simptbl"; + String typeName = "Person"; + + try { + + // Lets first test for default permissions, this is the case when user specified nothing. + Table tbl = getTable(dbName, tblName, typeName); + msc.createTable(tbl); + Database db = Hive.get(hcatConf).getDatabase(dbName); + Path dfsPath = clientWH.getTablePath(db, tblName); + cleanupTbl(dbName, tblName, typeName); + + // Next user did specify perms. + try { + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx-wx---"}); + } catch (Exception e) { + assertTrue(e instanceof ExitException); + assertEquals(((ExitException) e).getStatus(), 0); + } + dfsPath = clientWH.getTablePath(db, tblName); + assertTrue(dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath).getPermission().equals(FsPermission.valueOf("drwx-wx---"))); + + cleanupTbl(dbName, tblName, typeName); + + // User specified perms in invalid format. + hcatConf.set(HCatConstants.HCAT_PERMS, "rwx"); + // make sure create table fails. + try { + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx"}); + assert false; + } catch (Exception me) { + assertTrue(me instanceof ExitException); + } + // No physical dir gets created. + dfsPath = clientWH.getTablePath(db, tblName); + try { + dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); + assert false; + } catch (Exception fnfe) { + assertTrue(fnfe instanceof FileNotFoundException); + } + + // And no metadata gets created. + try { + msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName); + assert false; + } catch (Exception e) { + assertTrue(e instanceof NoSuchObjectException); + assertEquals("default.simptbl table not found", e.getMessage()); + } + + // test for invalid group name + hcatConf.set(HCatConstants.HCAT_PERMS, "drw-rw-rw-"); + hcatConf.set(HCatConstants.HCAT_GROUP, "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"); + + try { + // create table must fail. + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rw-rw-rw-", "-g", "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"}); + assert false; + } catch (Exception me) { + assertTrue(me instanceof SecurityException); + } + + try { + // no metadata should get created. + msc.getTable(dbName, tblName); + assert false; + } catch (Exception e) { + assertTrue(e instanceof NoSuchObjectException); + assertEquals("default.simptbl table not found", e.getMessage()); + } + try { + // neither dir should get created. + dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); + assert false; + } catch (Exception e) { + assertTrue(e instanceof FileNotFoundException); + } + + } catch (Exception e) { + LOG.error("testCustomPerms failed.", e); + throw e; } + } + private void silentDropDatabase(String dbName) throws MetaException, TException { + try { + for (String tableName : msc.getTables(dbName, "*")) { + msc.dropTable(dbName, tableName); + } - public void testCustomPerms() throws Exception { - - String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - String tblName = "simptbl"; - String typeName = "Person"; - - try { - - // Lets first test for default permissions, this is the case when user specified nothing. - Table tbl = getTable(dbName, tblName, typeName); - msc.createTable(tbl); - Database db = Hive.get(hcatConf).getDatabase(dbName); - Path dfsPath = clientWH.getTablePath(db, tblName); - cleanupTbl(dbName, tblName, typeName); - - // Next user did specify perms. - try { - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx-wx---"}); - } catch (Exception e) { - assertTrue(e instanceof ExitException); - assertEquals(((ExitException) e).getStatus(), 0); - } - dfsPath = clientWH.getTablePath(db, tblName); - assertTrue(dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath).getPermission().equals(FsPermission.valueOf("drwx-wx---"))); - - cleanupTbl(dbName, tblName, typeName); - - // User specified perms in invalid format. - hcatConf.set(HCatConstants.HCAT_PERMS, "rwx"); - // make sure create table fails. - try { - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx"}); - assert false; - } catch (Exception me) { - assertTrue(me instanceof ExitException); - } - // No physical dir gets created. - dfsPath = clientWH.getTablePath(db, tblName); - try { - dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); - assert false; - } catch (Exception fnfe) { - assertTrue(fnfe instanceof FileNotFoundException); - } - - // And no metadata gets created. - try { - msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName); - assert false; - } catch (Exception e) { - assertTrue(e instanceof NoSuchObjectException); - assertEquals("default.simptbl table not found", e.getMessage()); - } - - // test for invalid group name - hcatConf.set(HCatConstants.HCAT_PERMS, "drw-rw-rw-"); - hcatConf.set(HCatConstants.HCAT_GROUP, "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"); - - try { - // create table must fail. - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rw-rw-rw-", "-g", "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"}); - assert false; - } catch (Exception me) { - assertTrue(me instanceof SecurityException); - } - - try { - // no metadata should get created. - msc.getTable(dbName, tblName); - assert false; - } catch (Exception e) { - assertTrue(e instanceof NoSuchObjectException); - assertEquals("default.simptbl table not found", e.getMessage()); - } - try { - // neither dir should get created. - dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); - assert false; - } catch (Exception e) { - assertTrue(e instanceof FileNotFoundException); - } - - } catch (Exception e) { - LOG.error("testCustomPerms failed.", e); - throw e; - } + } catch (NoSuchObjectException e) { } + } - private void silentDropDatabase(String dbName) throws MetaException, TException { - try { - for (String tableName : msc.getTables(dbName, "*")) { - msc.dropTable(dbName, tableName); - } + private void cleanupTbl(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, InvalidOperationException { - } catch (NoSuchObjectException e) { - } - } - - private void cleanupTbl(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, InvalidOperationException { + msc.dropTable(dbName, tblName); + msc.dropType(typeName); + } - msc.dropTable(dbName, tblName); - msc.dropType(typeName); - } + private Table getTable(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, AlreadyExistsException, InvalidObjectException { - private Table getTable(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, AlreadyExistsException, InvalidObjectException { + msc.dropTable(dbName, tblName); + silentDropDatabase(dbName); - msc.dropTable(dbName, tblName); - silentDropDatabase(dbName); + msc.dropType(typeName); + Type typ1 = new Type(); + typ1.setName(typeName); + typ1.setFields(new ArrayList(1)); + typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, "")); + msc.createType(typ1); - msc.dropType(typeName); - Type typ1 = new Type(); - typ1.setName(typeName); - typ1.setFields(new ArrayList(1)); - typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, "")); - msc.createType(typ1); + Table tbl = new Table(); + tbl.setDbName(dbName); + tbl.setTableName(tblName); + StorageDescriptor sd = new StorageDescriptor(); + tbl.setSd(sd); + sd.setCols(typ1.getFields()); - Table tbl = new Table(); - tbl.setDbName(dbName); - tbl.setTableName(tblName); - StorageDescriptor sd = new StorageDescriptor(); - tbl.setSd(sd); - sd.setCols(typ1.getFields()); - - sd.setSerdeInfo(new SerDeInfo()); - return tbl; - } + sd.setSerdeInfo(new SerDeInfo()); + return tbl; + } - private SecurityManager securityManager; + private SecurityManager securityManager; } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestSemanticAnalysis.java b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestSemanticAnalysis.java index c7dcd40..80f2ec5 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestSemanticAnalysis.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestSemanticAnalysis.java @@ -55,339 +55,339 @@ */ public class TestSemanticAnalysis extends HCatBaseTest { - private static final Logger LOG = LoggerFactory.getLogger(TestSemanticAnalysis.class); - private static final String TBL_NAME = "junit_sem_analysis"; - - private Driver hcatDriver = null; - private String query; - - @Before - public void setUpHCatDriver() throws IOException { - if (hcatDriver == null) { - HiveConf hcatConf = new HiveConf(hiveConf); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } + private static final Logger LOG = LoggerFactory.getLogger(TestSemanticAnalysis.class); + private static final String TBL_NAME = "junit_sem_analysis"; + + private Driver hcatDriver = null; + private String query; + + @Before + public void setUpHCatDriver() throws IOException { + if (hcatDriver == null) { + HiveConf hcatConf = new HiveConf(hiveConf); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); } - - @Test - public void testDescDB() throws CommandNeedRetryException, IOException { - hcatDriver.run("drop database mydb cascade"); - assertEquals(0, hcatDriver.run("create database mydb").getResponseCode()); - CommandProcessorResponse resp = hcatDriver.run("describe database mydb"); - assertEquals(0, resp.getResponseCode()); - ArrayList result = new ArrayList(); - hcatDriver.getResults(result); - assertTrue(result.get(0).contains("mydb.db")); - hcatDriver.run("drop database mydb cascade"); - } - - @Test - public void testCreateTblWithLowerCasePartNames() throws CommandNeedRetryException, MetaException, TException, NoSuchObjectException { - driver.run("drop table junit_sem_analysis"); - CommandProcessorResponse resp = driver.run("create table junit_sem_analysis (a int) partitioned by (B string) stored as TEXTFILE"); - assertEquals(resp.getResponseCode(), 0); - assertEquals(null, resp.getErrorMessage()); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals("Partition key name case problem", "b", tbl.getPartitionKeys().get(0).getName()); - driver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblFFpart() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - driver.run("drop table junit_sem_analysis"); - driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"); - driver.run("alter table junit_sem_analysis add partition (b='2010-10-10')"); - hcatDriver.run("alter table junit_sem_analysis partition (b='2010-10-10') set fileformat RCFILE"); - - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(TextInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - List partVals = new ArrayList(1); - partVals.add("2010-10-10"); - Partition part = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals); - - assertEquals(RCFileInputFormat.class.getName(), part.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), part.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testUsNonExistentDB() throws CommandNeedRetryException { - CommandProcessorResponse resp = hcatDriver.run("use no_such_db"); - assertEquals(1, resp.getResponseCode()); - } - - @Test - public void testDatabaseOperations() throws MetaException, CommandNeedRetryException { - - List dbs = client.getAllDatabases(); - String testDb1 = "testdatabaseoperatons1"; - String testDb2 = "testdatabaseoperatons2"; - - if (dbs.contains(testDb1.toLowerCase())) { - assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); - } - - if (dbs.contains(testDb2.toLowerCase())) { - assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); - } - - assertEquals(0, hcatDriver.run("create database " + testDb1).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb1)); - assertEquals(0, hcatDriver.run("create database if not exists " + testDb1).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb1)); - assertEquals(0, hcatDriver.run("create database if not exists " + testDb2).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb2)); - - assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); - assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); - assertFalse(client.getAllDatabases().contains(testDb1)); - assertFalse(client.getAllDatabases().contains(testDb2)); - } - - @Test - public void testCreateTableIfNotExists() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table " + TBL_NAME); - hcatDriver.run("create table junit_sem_analysis (a int) stored as RCFILE"); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - List cols = tbl.getSd().getCols(); - assertEquals(1, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - CommandProcessorResponse resp = hcatDriver.run("create table if not exists junit_sem_analysis (a int) stored as RCFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - cols = tbl.getSd().getCols(); - assertEquals(1, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblTouch() throws CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis touch"); - assertEquals(0, response.getResponseCode()); - - hcatDriver.run("alter table junit_sem_analysis touch partition (b='12')"); - assertEquals(0, response.getResponseCode()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testChangeColumns() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis change a a1 int"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis change a1 a string"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis change a a int after c"); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAddReplaceCols() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string)"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis replace columns (a1 tinyint)"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis add columns (d tinyint)"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - - response = hcatDriver.run("describe extended junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - List cols = tbl.getSd().getCols(); - assertEquals(2, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a1", "tinyint", null))); - assertTrue(cols.get(1).equals(new FieldSchema("d", "tinyint", null))); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblClusteredBy() throws CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis clustered by (a) into 7 buckets"); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testDescDB() throws CommandNeedRetryException, IOException { + hcatDriver.run("drop database mydb cascade"); + assertEquals(0, hcatDriver.run("create database mydb").getResponseCode()); + CommandProcessorResponse resp = hcatDriver.run("describe database mydb"); + assertEquals(0, resp.getResponseCode()); + ArrayList result = new ArrayList(); + hcatDriver.getResults(result); + assertTrue(result.get(0).contains("mydb.db")); + hcatDriver.run("drop database mydb cascade"); + } + + @Test + public void testCreateTblWithLowerCasePartNames() throws CommandNeedRetryException, MetaException, TException, NoSuchObjectException { + driver.run("drop table junit_sem_analysis"); + CommandProcessorResponse resp = driver.run("create table junit_sem_analysis (a int) partitioned by (B string) stored as TEXTFILE"); + assertEquals(resp.getResponseCode(), 0); + assertEquals(null, resp.getErrorMessage()); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals("Partition key name case problem", "b", tbl.getPartitionKeys().get(0).getName()); + driver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblFFpart() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"); + driver.run("alter table junit_sem_analysis add partition (b='2010-10-10')"); + hcatDriver.run("alter table junit_sem_analysis partition (b='2010-10-10') set fileformat RCFILE"); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(TextInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + List partVals = new ArrayList(1); + partVals.add("2010-10-10"); + Partition part = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals); + + assertEquals(RCFileInputFormat.class.getName(), part.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), part.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testUsNonExistentDB() throws CommandNeedRetryException { + CommandProcessorResponse resp = hcatDriver.run("use no_such_db"); + assertEquals(1, resp.getResponseCode()); + } + + @Test + public void testDatabaseOperations() throws MetaException, CommandNeedRetryException { + + List dbs = client.getAllDatabases(); + String testDb1 = "testdatabaseoperatons1"; + String testDb2 = "testdatabaseoperatons2"; + + if (dbs.contains(testDb1.toLowerCase())) { + assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); } - @Test - public void testAlterTableSetFF() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("alter table junit_sem_analysis set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); - hcatDriver.run("desc extended junit_sem_analysis"); - - tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); + if (dbs.contains(testDb2.toLowerCase())) { + assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); } - @Test - public void testAddPartFail() throws CommandNeedRetryException { + assertEquals(0, hcatDriver.run("create database " + testDb1).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb1)); + assertEquals(0, hcatDriver.run("create database if not exists " + testDb1).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb1)); + assertEquals(0, hcatDriver.run("create database if not exists " + testDb2).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb2)); + + assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); + assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); + assertFalse(client.getAllDatabases().contains(testDb1)); + assertFalse(client.getAllDatabases().contains(testDb2)); + } + + @Test + public void testCreateTableIfNotExists() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table " + TBL_NAME); + hcatDriver.run("create table junit_sem_analysis (a int) stored as RCFILE"); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + List cols = tbl.getSd().getCols(); + assertEquals(1, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + CommandProcessorResponse resp = hcatDriver.run("create table if not exists junit_sem_analysis (a int) stored as RCFILE"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + cols = tbl.getSd().getCols(); + assertEquals(1, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblTouch() throws CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis touch"); + assertEquals(0, response.getResponseCode()); + + hcatDriver.run("alter table junit_sem_analysis touch partition (b='12')"); + assertEquals(0, response.getResponseCode()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testChangeColumns() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis change a a1 int"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis change a1 a string"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis change a a int after c"); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddReplaceCols() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string)"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis replace columns (a1 tinyint)"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis add columns (d tinyint)"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + + response = hcatDriver.run("describe extended junit_sem_analysis"); + assertEquals(0, response.getResponseCode()); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + List cols = tbl.getSd().getCols(); + assertEquals(2, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a1", "tinyint", null))); + assertTrue(cols.get(1).equals(new FieldSchema("d", "tinyint", null))); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblClusteredBy() throws CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis clustered by (a) into 7 buckets"); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTableSetFF() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("alter table junit_sem_analysis set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); + hcatDriver.run("desc extended junit_sem_analysis"); + + tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddPartFail() throws CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location 'README.txt'"); + assertEquals(0, response.getResponseCode()); + driver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddPartPass() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location '" + TEST_DATA_DIR + "'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testCTAS() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) as select * from tbl2"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(40000, response.getResponseCode()); + assertTrue(response.getErrorMessage().contains("FAILED: SemanticException Operation not supported. Create table as Select is not a valid operation.")); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testStoredAs() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int)"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddDriverInfo() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as " + + "INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver' "; + assertEquals(0, hcatDriver.run(query).getResponseCode()); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testInvalidateNonStringPartition() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b int) stored as RCFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(40000, response.getResponseCode()); + assertEquals("FAILED: SemanticException Operation not supported. HCatalog only supports partition columns of type string. For column: b Found type: int", + response.getErrorMessage()); - driver.run("drop table junit_sem_analysis"); - driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location 'README.txt'"); - assertEquals(0, response.getResponseCode()); - driver.run("drop table junit_sem_analysis"); - } + } - @Test - public void testAddPartPass() throws IOException, CommandNeedRetryException { + @Test + public void testInvalidateSeqFileStoredAs() throws IOException, CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location '" + TEST_DATA_DIR + "'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - hcatDriver.run("drop table junit_sem_analysis"); - } + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as SEQUENCEFILE"; - @Test - public void testCTAS() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) as select * from tbl2"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(40000, response.getResponseCode()); - assertTrue(response.getErrorMessage().contains("FAILED: SemanticException Operation not supported. Create table as Select is not a valid operation.")); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testStoredAs() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int)"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); - @Test - public void testAddDriverInfo() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + } - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as " + - "INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver' "; - assertEquals(0, hcatDriver.run(query).getResponseCode()); + @Test + public void testInvalidateTextFileStoredAs() throws IOException, CommandNeedRetryException { - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } + } - @Test - public void testInvalidateNonStringPartition() throws IOException, CommandNeedRetryException { + @Test + public void testInvalidateClusteredBy() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) clustered by (a) into 10 buckets stored as TEXTFILE"; - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b int) stored as RCFILE"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + } + + @Test + public void testCTLFail() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("drop table like_table"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(40000, response.getResponseCode()); - assertEquals("FAILED: SemanticException Operation not supported. HCatalog only supports partition columns of type string. For column: b Found type: int", - response.getErrorMessage()); + driver.run(query); + query = "create table like_table like junit_sem_analysis"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + } + + @Test + public void testCTLPass() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + try { + hcatDriver.run("drop table junit_sem_analysis"); + } catch (Exception e) { + LOG.error("Error in drop table.", e); } - - @Test - public void testInvalidateSeqFileStoredAs() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as SEQUENCEFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - - } - - @Test - public void testInvalidateTextFileStoredAs() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - - } - - @Test - public void testInvalidateClusteredBy() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) clustered by (a) into 10 buckets stored as TEXTFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - } - - @Test - public void testCTLFail() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_sem_analysis"); - driver.run("drop table like_table"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - - driver.run(query); - query = "create table like_table like junit_sem_analysis"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - } - - @Test - public void testCTLPass() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - try { - hcatDriver.run("drop table junit_sem_analysis"); - } catch (Exception e) { - LOG.error("Error in drop table.", e); - } - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - - hcatDriver.run(query); - String likeTbl = "like_table"; - hcatDriver.run("drop table " + likeTbl); - query = "create table like_table like junit_sem_analysis"; - CommandProcessorResponse resp = hcatDriver.run(query); - assertEquals(0, resp.getResponseCode()); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; + + hcatDriver.run(query); + String likeTbl = "like_table"; + hcatDriver.run("drop table " + likeTbl); + query = "create table like_table like junit_sem_analysis"; + CommandProcessorResponse resp = hcatDriver.run(query); + assertEquals(0, resp.getResponseCode()); // Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, likeTbl); // assertEquals(likeTbl,tbl.getTableName()); // List cols = tbl.getSd().getCols(); @@ -401,7 +401,7 @@ public void testCTLPass() throws IOException, MetaException, TException, NoSuchO // // hcatDriver.run("drop table junit_sem_analysis"); // hcatDriver.run("drop table "+likeTbl); - } + } // This test case currently fails, since add partitions don't inherit anything from tables. diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestUseDatabase.java b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestUseDatabase.java index c2ef9c6..d164da3 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestUseDatabase.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestUseDatabase.java @@ -36,44 +36,44 @@ */ public class TestUseDatabase extends TestCase { - private Driver hcatDriver; + private Driver hcatDriver; - @Override - protected void setUp() throws Exception { + @Override + protected void setUp() throws Exception { - HiveConf hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + HiveConf hcatConf = new HiveConf(this.getClass()); + hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); + } - String query; - private final String dbName = "testUseDatabase_db"; - private final String tblName = "testUseDatabase_tbl"; + String query; + private final String dbName = "testUseDatabase_db"; + private final String tblName = "testUseDatabase_tbl"; - public void testAlterTablePass() throws IOException, CommandNeedRetryException { + public void testAlterTablePass() throws IOException, CommandNeedRetryException { - hcatDriver.run("create database " + dbName); - hcatDriver.run("use " + dbName); - hcatDriver.run("create table " + tblName + " (a int) partitioned by (b string) stored as RCFILE"); + hcatDriver.run("create database " + dbName); + hcatDriver.run("use " + dbName); + hcatDriver.run("create table " + tblName + " (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response; + CommandProcessorResponse response; - response = hcatDriver.run("alter table " + tblName + " add partition (b='2') location '/tmp'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); + response = hcatDriver.run("alter table " + tblName + " add partition (b='2') location '/tmp'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); - response = hcatDriver.run("alter table " + tblName + " set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); + response = hcatDriver.run("alter table " + tblName + " set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); - hcatDriver.run("drop table " + tblName); - hcatDriver.run("drop database " + dbName); - } + hcatDriver.run("drop table " + tblName); + hcatDriver.run("drop database " + dbName); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHCatUtil.java b/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHCatUtil.java index 2205b49..2783001 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHCatUtil.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHCatUtil.java @@ -44,142 +44,142 @@ */ public class TestHCatUtil { - @Test - public void testFsPermissionOperation() { - - HashMap permsCode = new HashMap(); - - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) { - for (int k = 0; k < 8; k++) { - StringBuilder sb = new StringBuilder(); - sb.append("0"); - sb.append(i); - sb.append(j); - sb.append(k); - Integer code = (((i * 8) + j) * 8) + k; - String perms = (new FsPermission(Short.decode(sb.toString()))).toString(); - if (permsCode.containsKey(perms)) { - Assert.assertEquals("permissions(" + perms + ") mapped to multiple codes", code, permsCode.get(perms)); - } - permsCode.put(perms, code); - assertFsPermissionTransformationIsGood(perms); - } - } + @Test + public void testFsPermissionOperation() { + + HashMap permsCode = new HashMap(); + + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + for (int k = 0; k < 8; k++) { + StringBuilder sb = new StringBuilder(); + sb.append("0"); + sb.append(i); + sb.append(j); + sb.append(k); + Integer code = (((i * 8) + j) * 8) + k; + String perms = (new FsPermission(Short.decode(sb.toString()))).toString(); + if (permsCode.containsKey(perms)) { + Assert.assertEquals("permissions(" + perms + ") mapped to multiple codes", code, permsCode.get(perms)); + } + permsCode.put(perms, code); + assertFsPermissionTransformationIsGood(perms); } + } } - - private void assertFsPermissionTransformationIsGood(String perms) { - Assert.assertEquals(perms, FsPermission.valueOf("-" + perms).toString()); - } - - @Test - public void testValidateMorePermissive() { - assertConsistentFsPermissionBehaviour(FsAction.ALL, true, true, true, true, true, true, true, true); - assertConsistentFsPermissionBehaviour(FsAction.READ, false, true, false, true, false, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.WRITE, false, true, false, false, true, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.EXECUTE, false, true, true, false, false, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.READ_EXECUTE, false, true, true, true, false, true, false, false); - assertConsistentFsPermissionBehaviour(FsAction.READ_WRITE, false, true, false, true, true, false, true, false); - assertConsistentFsPermissionBehaviour(FsAction.WRITE_EXECUTE, false, true, true, false, true, false, false, true); - assertConsistentFsPermissionBehaviour(FsAction.NONE, false, true, false, false, false, false, false, false); - } - - - private void assertConsistentFsPermissionBehaviour( - FsAction base, boolean versusAll, boolean versusNone, - boolean versusX, boolean versusR, boolean versusW, - boolean versusRX, boolean versusRW, boolean versusWX) { - - Assert.assertTrue(versusAll == HCatUtil.validateMorePermissive(base, FsAction.ALL)); - Assert.assertTrue(versusX == HCatUtil.validateMorePermissive(base, FsAction.EXECUTE)); - Assert.assertTrue(versusNone == HCatUtil.validateMorePermissive(base, FsAction.NONE)); - Assert.assertTrue(versusR == HCatUtil.validateMorePermissive(base, FsAction.READ)); - Assert.assertTrue(versusRX == HCatUtil.validateMorePermissive(base, FsAction.READ_EXECUTE)); - Assert.assertTrue(versusRW == HCatUtil.validateMorePermissive(base, FsAction.READ_WRITE)); - Assert.assertTrue(versusW == HCatUtil.validateMorePermissive(base, FsAction.WRITE)); - Assert.assertTrue(versusWX == HCatUtil.validateMorePermissive(base, FsAction.WRITE_EXECUTE)); - } - - @Test - public void testExecutePermissionsCheck() { - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.ALL)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.NONE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.EXECUTE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_EXECUTE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE_EXECUTE)); - - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ)); - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE)); - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_WRITE)); - - } - - @Test - public void testGetTableSchemaWithPtnColsApi() throws IOException { - // Check the schema of a table with one field & no partition keys. - StorageDescriptor sd = new StorageDescriptor( - Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), - "location", "org.apache.hadoop.mapred.TextInputFormat", - "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), - new ArrayList(), new ArrayList(), new HashMap()); - org.apache.hadoop.hive.metastore.api.Table apiTable = - new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", - 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); - Table table = new Table(apiTable); - - List expectedHCatSchema = - Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null)); - - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - - // Add a partition key & ensure its reflected in the schema. - List partitionKeys = - Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null)); - table.getTTable().setPartitionKeys(partitionKeys); - expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null)); - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - } - - /** - * Hive represents tables in two ways: - *

    - *
  • org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore
  • - *
  • org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table
  • - *
- * Here we check SerDe-reported fields are included in the table schema. - */ - @Test - public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException { - Map parameters = Maps.newHashMap(); - parameters.put(serdeConstants.SERIALIZATION_CLASS, - "org.apache.hadoop.hive.serde2.thrift.test.IntString"); - parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol"); - - SerDeInfo serDeInfo = new SerDeInfo(null, - "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters); - - // StorageDescriptor has an empty list of fields - SerDe will report them. - StorageDescriptor sd = new StorageDescriptor(new ArrayList(), "location", - "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", - false, -1, serDeInfo, new ArrayList(), new ArrayList(), - new HashMap()); - - org.apache.hadoop.hive.metastore.api.Table apiTable = - new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", - 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); - Table table = new Table(apiTable); - - List expectedHCatSchema = Lists.newArrayList( - new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), - new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), - new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null)); - - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - } + } + + private void assertFsPermissionTransformationIsGood(String perms) { + Assert.assertEquals(perms, FsPermission.valueOf("-" + perms).toString()); + } + + @Test + public void testValidateMorePermissive() { + assertConsistentFsPermissionBehaviour(FsAction.ALL, true, true, true, true, true, true, true, true); + assertConsistentFsPermissionBehaviour(FsAction.READ, false, true, false, true, false, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.WRITE, false, true, false, false, true, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.EXECUTE, false, true, true, false, false, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.READ_EXECUTE, false, true, true, true, false, true, false, false); + assertConsistentFsPermissionBehaviour(FsAction.READ_WRITE, false, true, false, true, true, false, true, false); + assertConsistentFsPermissionBehaviour(FsAction.WRITE_EXECUTE, false, true, true, false, true, false, false, true); + assertConsistentFsPermissionBehaviour(FsAction.NONE, false, true, false, false, false, false, false, false); + } + + + private void assertConsistentFsPermissionBehaviour( + FsAction base, boolean versusAll, boolean versusNone, + boolean versusX, boolean versusR, boolean versusW, + boolean versusRX, boolean versusRW, boolean versusWX) { + + Assert.assertTrue(versusAll == HCatUtil.validateMorePermissive(base, FsAction.ALL)); + Assert.assertTrue(versusX == HCatUtil.validateMorePermissive(base, FsAction.EXECUTE)); + Assert.assertTrue(versusNone == HCatUtil.validateMorePermissive(base, FsAction.NONE)); + Assert.assertTrue(versusR == HCatUtil.validateMorePermissive(base, FsAction.READ)); + Assert.assertTrue(versusRX == HCatUtil.validateMorePermissive(base, FsAction.READ_EXECUTE)); + Assert.assertTrue(versusRW == HCatUtil.validateMorePermissive(base, FsAction.READ_WRITE)); + Assert.assertTrue(versusW == HCatUtil.validateMorePermissive(base, FsAction.WRITE)); + Assert.assertTrue(versusWX == HCatUtil.validateMorePermissive(base, FsAction.WRITE_EXECUTE)); + } + + @Test + public void testExecutePermissionsCheck() { + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.ALL)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.NONE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.EXECUTE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_EXECUTE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE_EXECUTE)); + + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ)); + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE)); + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_WRITE)); + + } + + @Test + public void testGetTableSchemaWithPtnColsApi() throws IOException { + // Check the schema of a table with one field & no partition keys. + StorageDescriptor sd = new StorageDescriptor( + Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), + "location", "org.apache.hadoop.mapred.TextInputFormat", + "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), + new ArrayList(), new ArrayList(), new HashMap()); + org.apache.hadoop.hive.metastore.api.Table apiTable = + new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", + 0, 0, 0, sd, new ArrayList(), new HashMap(), + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + Table table = new Table(apiTable); + + List expectedHCatSchema = + Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null)); + + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + + // Add a partition key & ensure its reflected in the schema. + List partitionKeys = + Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null)); + table.getTTable().setPartitionKeys(partitionKeys); + expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null)); + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + } + + /** + * Hive represents tables in two ways: + *
    + *
  • org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore
  • + *
  • org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table
  • + *
+ * Here we check SerDe-reported fields are included in the table schema. + */ + @Test + public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException { + Map parameters = Maps.newHashMap(); + parameters.put(serdeConstants.SERIALIZATION_CLASS, + "org.apache.hadoop.hive.serde2.thrift.test.IntString"); + parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol"); + + SerDeInfo serDeInfo = new SerDeInfo(null, + "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters); + + // StorageDescriptor has an empty list of fields - SerDe will report them. + StorageDescriptor sd = new StorageDescriptor(new ArrayList(), "location", + "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", + false, -1, serDeInfo, new ArrayList(), new ArrayList(), + new HashMap()); + + org.apache.hadoop.hive.metastore.api.Table apiTable = + new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", + 0, 0, 0, sd, new ArrayList(), new HashMap(), + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + Table table = new Table(apiTable); + + List expectedHCatSchema = Lists.newArrayList( + new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), + new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), + new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null)); + + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHiveClientCache.java b/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHiveClientCache.java index eb52080..d69dc8e 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHiveClientCache.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHiveClientCache.java @@ -65,205 +65,205 @@ */ public class TestHiveClientCache { - private static final Logger LOG = LoggerFactory.getLogger(TestHiveClientCache.class); - final HiveConf hiveConf = new HiveConf(); - - @BeforeClass - public static void setUp() throws Exception { + private static final Logger LOG = LoggerFactory.getLogger(TestHiveClientCache.class); + final HiveConf hiveConf = new HiveConf(); + + @BeforeClass + public static void setUp() throws Exception { + } + + @AfterClass + public static void tearDown() throws Exception { + } + + @Test + public void testCacheHit() throws IOException, MetaException, LoginException { + + HiveClientCache cache = new HiveClientCache(1000); + HiveMetaStoreClient client = cache.get(hiveConf); + assertNotNull(client); + client.close(); // close shouldn't matter + + // Setting a non important configuration should return the same client only + hiveConf.setIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS, 10); + HiveMetaStoreClient client2 = cache.get(hiveConf); + assertNotNull(client2); + assertEquals(client, client2); + client2.close(); + } + + @Test + public void testCacheMiss() throws IOException, MetaException, LoginException { + HiveClientCache cache = new HiveClientCache(1000); + HiveMetaStoreClient client = cache.get(hiveConf); + assertNotNull(client); + + // Set different uri as it is one of the criteria deciding whether to return the same client or not + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different + HiveMetaStoreClient client2 = cache.get(hiveConf); + assertNotNull(client2); + assertNotSame(client, client2); + } + + /** + * Check that a new client is returned for the same configuration after the expiry time. + * Also verify that the expiry time configuration is honoured + */ + @Test + public void testCacheExpiry() throws IOException, MetaException, LoginException, InterruptedException { + HiveClientCache cache = new HiveClientCache(1); + HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + assertNotNull(client); + + Thread.sleep(2500); + HiveMetaStoreClient client2 = cache.get(hiveConf); + client.close(); + assertTrue(client.isClosed()); // close() after *expiry time* and *a cache access* should have tore down the client + + assertNotNull(client2); + assertNotSame(client, client2); + } + + /** + * Check that a *new* client is created if asked from different threads even with + * the same hive configuration + * @throws ExecutionException + * @throws InterruptedException + */ + @Test + public void testMultipleThreadAccess() throws ExecutionException, InterruptedException { + final HiveClientCache cache = new HiveClientCache(1000); + + class GetHiveClient implements Callable { + @Override + public HiveMetaStoreClient call() throws IOException, MetaException, LoginException { + return cache.get(hiveConf); + } } - @AfterClass - public static void tearDown() throws Exception { + ExecutorService executor = Executors.newFixedThreadPool(2); + + Callable worker1 = new GetHiveClient(); + Callable worker2 = new GetHiveClient(); + Future clientFuture1 = executor.submit(worker1); + Future clientFuture2 = executor.submit(worker2); + HiveMetaStoreClient client1 = clientFuture1.get(); + HiveMetaStoreClient client2 = clientFuture2.get(); + assertNotNull(client1); + assertNotNull(client2); + assertNotSame(client1, client2); + } + + @Test + public void testCloseAllClients() throws IOException, MetaException, LoginException { + final HiveClientCache cache = new HiveClientCache(1000); + HiveClientCache.CacheableHiveMetaStoreClient client1 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different + HiveClientCache.CacheableHiveMetaStoreClient client2 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + cache.closeAllClientsQuietly(); + assertTrue(client1.isClosed()); + assertTrue(client2.isClosed()); + } + + /** + * Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects + * and tells if the client is broken + */ + @Ignore("hangs indefinitely") + @Test + public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, + InvalidObjectException, NoSuchObjectException, InterruptedException { + // Setup + LocalMetaServer metaServer = new LocalMetaServer(); + metaServer.start(); + + final HiveClientCache cache = new HiveClientCache(1000); + HiveClientCache.CacheableHiveMetaStoreClient client = + (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf()); + + assertTrue(client.isOpen()); + + final String DB_NAME = "test_db"; + final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2); + + try { + client.dropTable(DB_NAME, LONG_TABLE_NAME); + } catch (Exception e) { } - - @Test - public void testCacheHit() throws IOException, MetaException, LoginException { - - HiveClientCache cache = new HiveClientCache(1000); - HiveMetaStoreClient client = cache.get(hiveConf); - assertNotNull(client); - client.close(); // close shouldn't matter - - // Setting a non important configuration should return the same client only - hiveConf.setIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS, 10); - HiveMetaStoreClient client2 = cache.get(hiveConf); - assertNotNull(client2); - assertEquals(client, client2); - client2.close(); + try { + client.dropDatabase(DB_NAME); + } catch (Exception e) { } - @Test - public void testCacheMiss() throws IOException, MetaException, LoginException { - HiveClientCache cache = new HiveClientCache(1000); - HiveMetaStoreClient client = cache.get(hiveConf); - assertNotNull(client); - - // Set different uri as it is one of the criteria deciding whether to return the same client or not - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different - HiveMetaStoreClient client2 = cache.get(hiveConf); - assertNotNull(client2); - assertNotSame(client, client2); + client.createDatabase(new Database(DB_NAME, "", null, null)); + + List fields = new ArrayList(); + fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); + Table tbl = new Table(); + tbl.setDbName(DB_NAME); + tbl.setTableName(LONG_TABLE_NAME); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(fields); + tbl.setSd(sd); + sd.setSerdeInfo(new SerDeInfo()); + + // Break the client + try { + client.createTable(tbl); + fail("Exception was expected while creating table with long name"); + } catch (Exception e) { } - /** - * Check that a new client is returned for the same configuration after the expiry time. - * Also verify that the expiry time configuration is honoured - */ - @Test - public void testCacheExpiry() throws IOException, MetaException, LoginException, InterruptedException { - HiveClientCache cache = new HiveClientCache(1); - HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - assertNotNull(client); - - Thread.sleep(2500); - HiveMetaStoreClient client2 = cache.get(hiveConf); - client.close(); - assertTrue(client.isClosed()); // close() after *expiry time* and *a cache access* should have tore down the client - - assertNotNull(client2); - assertNotSame(client, client2); + assertFalse(client.isOpen()); + metaServer.shutDown(); + } + + private static class LocalMetaServer implements Runnable { + public final int MS_PORT = 20101; + private final HiveConf hiveConf; + private final SecurityManager securityManager; + public final static int WAIT_TIME_FOR_BOOTUP = 30000; + + public LocalMetaServer() { + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + hiveConf = new HiveConf(TestHiveClientCache.class); + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + MS_PORT); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); } - /** - * Check that a *new* client is created if asked from different threads even with - * the same hive configuration - * @throws ExecutionException - * @throws InterruptedException - */ - @Test - public void testMultipleThreadAccess() throws ExecutionException, InterruptedException { - final HiveClientCache cache = new HiveClientCache(1000); - - class GetHiveClient implements Callable { - @Override - public HiveMetaStoreClient call() throws IOException, MetaException, LoginException { - return cache.get(hiveConf); - } - } - - ExecutorService executor = Executors.newFixedThreadPool(2); - - Callable worker1 = new GetHiveClient(); - Callable worker2 = new GetHiveClient(); - Future clientFuture1 = executor.submit(worker1); - Future clientFuture2 = executor.submit(worker2); - HiveMetaStoreClient client1 = clientFuture1.get(); - HiveMetaStoreClient client2 = clientFuture2.get(); - assertNotNull(client1); - assertNotNull(client2); - assertNotSame(client1, client2); + public void start() throws InterruptedException { + Thread thread = new Thread(this); + thread.start(); + Thread.sleep(WAIT_TIME_FOR_BOOTUP); // Wait for the server to bootup } - @Test - public void testCloseAllClients() throws IOException, MetaException, LoginException { - final HiveClientCache cache = new HiveClientCache(1000); - HiveClientCache.CacheableHiveMetaStoreClient client1 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different - HiveClientCache.CacheableHiveMetaStoreClient client2 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - cache.closeAllClientsQuietly(); - assertTrue(client1.isClosed()); - assertTrue(client2.isClosed()); + @Override + public void run() { + try { + HiveMetaStore.main(new String[]{"-v", "-p", String.valueOf(MS_PORT)}); + } catch (Throwable t) { + LOG.error("Exiting. Got exception from metastore: ", t); + } } - /** - * Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects - * and tells if the client is broken - */ - @Ignore("hangs indefinitely") - @Test - public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, - InvalidObjectException, NoSuchObjectException, InterruptedException { - // Setup - LocalMetaServer metaServer = new LocalMetaServer(); - metaServer.start(); - - final HiveClientCache cache = new HiveClientCache(1000); - HiveClientCache.CacheableHiveMetaStoreClient client = - (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf()); - - assertTrue(client.isOpen()); - - final String DB_NAME = "test_db"; - final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2); - - try { - client.dropTable(DB_NAME, LONG_TABLE_NAME); - } catch (Exception e) { - } - try { - client.dropDatabase(DB_NAME); - } catch (Exception e) { - } - - client.createDatabase(new Database(DB_NAME, "", null, null)); - - List fields = new ArrayList(); - fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); - Table tbl = new Table(); - tbl.setDbName(DB_NAME); - tbl.setTableName(LONG_TABLE_NAME); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(fields); - tbl.setSd(sd); - sd.setSerdeInfo(new SerDeInfo()); - - // Break the client - try { - client.createTable(tbl); - fail("Exception was expected while creating table with long name"); - } catch (Exception e) { - } - - assertFalse(client.isOpen()); - metaServer.shutDown(); + public HiveConf getHiveConf() { + return hiveConf; } - private static class LocalMetaServer implements Runnable { - public final int MS_PORT = 20101; - private final HiveConf hiveConf; - private final SecurityManager securityManager; - public final static int WAIT_TIME_FOR_BOOTUP = 30000; - - public LocalMetaServer() { - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - hiveConf = new HiveConf(TestHiveClientCache.class); - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + MS_PORT); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - } - - public void start() throws InterruptedException { - Thread thread = new Thread(this); - thread.start(); - Thread.sleep(WAIT_TIME_FOR_BOOTUP); // Wait for the server to bootup - } - - @Override - public void run() { - try { - HiveMetaStore.main(new String[]{"-v", "-p", String.valueOf(MS_PORT)}); - } catch (Throwable t) { - LOG.error("Exiting. Got exception from metastore: ", t); - } - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - public void shutDown() { - System.setSecurityManager(securityManager); - } + public void shutDown() { + System.setSecurityManager(securityManager); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/HCatDataCheckUtil.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/HCatDataCheckUtil.java index 673a038..b691185 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/HCatDataCheckUtil.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/HCatDataCheckUtil.java @@ -38,78 +38,78 @@ */ public class HCatDataCheckUtil { - private static final Logger LOG = LoggerFactory.getLogger(HCatDataCheckUtil.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatDataCheckUtil.class); - public static Driver instantiateDriver(MiniCluster cluster) { - HiveConf hiveConf = new HiveConf(HCatDataCheckUtil.class); - for (Entry e : cluster.getProperties().entrySet()) { - hiveConf.set(e.getKey().toString(), e.getValue().toString()); - } - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - LOG.debug("Hive conf : {}", hiveConf.getAllProperties()); - Driver driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - return driver; - } - - public static void generateDataFile(MiniCluster cluster, String fileName) throws IOException { - MiniCluster.deleteFile(cluster, fileName); - String[] input = new String[50]; - for (int i = 0; i < 50; i++) { - input[i] = (i % 5) + "\t" + i + "\t" + "_S" + i + "S_"; - } - MiniCluster.createInputFile(cluster, fileName, input); + public static Driver instantiateDriver(MiniCluster cluster) { + HiveConf hiveConf = new HiveConf(HCatDataCheckUtil.class); + for (Entry e : cluster.getProperties().entrySet()) { + hiveConf.set(e.getKey().toString(), e.getValue().toString()); } - - public static void createTable(Driver driver, String tableName, String createTableArgs) - throws CommandNeedRetryException, IOException { - String createTable = "create table " + tableName + createTableArgs; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + LOG.debug("Hive conf : {}", hiveConf.getAllProperties()); + Driver driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + return driver; + } + + public static void generateDataFile(MiniCluster cluster, String fileName) throws IOException { + MiniCluster.deleteFile(cluster, fileName); + String[] input = new String[50]; + for (int i = 0; i < 50; i++) { + input[i] = (i % 5) + "\t" + i + "\t" + "_S" + i + "S_"; } - - public static void dropTable(Driver driver, String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table if exists " + tablename); + MiniCluster.createInputFile(cluster, fileName, input); + } + + public static void createTable(Driver driver, String tableName, String createTableArgs) + throws CommandNeedRetryException, IOException { + String createTable = "create table " + tableName + createTableArgs; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); } - - public static ArrayList formattedRun(Driver driver, String name, String selectCmd) - throws CommandNeedRetryException, IOException { - driver.run(selectCmd); - ArrayList src_values = new ArrayList(); - driver.getResults(src_values); - LOG.info("{} : {}", name, src_values); - return src_values; - } - - - public static boolean recordsEqual(HCatRecord first, HCatRecord second) { - return (compareRecords(first, second) == 0); - } - - public static int compareRecords(HCatRecord first, HCatRecord second) { - return compareRecordContents(first.getAll(), second.getAll()); - } - - public static int compareRecordContents(List first, List second) { - int mySz = first.size(); - int urSz = second.size(); - if (mySz != urSz) { - return mySz - urSz; - } else { - for (int i = 0; i < first.size(); i++) { - int c = DataType.compare(first.get(i), second.get(i)); - if (c != 0) { - return c; - } - } - return 0; + } + + public static void dropTable(Driver driver, String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table if exists " + tablename); + } + + public static ArrayList formattedRun(Driver driver, String name, String selectCmd) + throws CommandNeedRetryException, IOException { + driver.run(selectCmd); + ArrayList src_values = new ArrayList(); + driver.getResults(src_values); + LOG.info("{} : {}", name, src_values); + return src_values; + } + + + public static boolean recordsEqual(HCatRecord first, HCatRecord second) { + return (compareRecords(first, second) == 0); + } + + public static int compareRecords(HCatRecord first, HCatRecord second) { + return compareRecordContents(first.getAll(), second.getAll()); + } + + public static int compareRecordContents(List first, List second) { + int mySz = first.size(); + int urSz = second.size(); + if (mySz != urSz) { + return mySz - urSz; + } else { + for (int i = 0; i < first.size(); i++) { + int c = DataType.compare(first.get(i), second.get(i)); + if (c != 0) { + return c; } + } + return 0; } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestDefaultHCatRecord.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestDefaultHCatRecord.java index 799b814..3d777d5 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestDefaultHCatRecord.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestDefaultHCatRecord.java @@ -44,219 +44,219 @@ */ public class TestDefaultHCatRecord extends TestCase { - public void testRYW() throws IOException { + public void testRYW() throws IOException { - File f = new File("binary.dat"); - f.delete(); - f.createNewFile(); - f.deleteOnExit(); + File f = new File("binary.dat"); + f.delete(); + f.createNewFile(); + f.deleteOnExit(); - OutputStream fileOutStream = new FileOutputStream(f); - DataOutput outStream = new DataOutputStream(fileOutStream); - - HCatRecord[] recs = getHCatRecords(); - for (int i = 0; i < recs.length; i++) { - recs[i].write(outStream); - } - fileOutStream.flush(); - fileOutStream.close(); - - InputStream fInStream = new FileInputStream(f); - DataInput inpStream = new DataInputStream(fInStream); - - for (int i = 0; i < recs.length; i++) { - HCatRecord rec = new DefaultHCatRecord(); - rec.readFields(inpStream); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[i], rec)); - } - - Assert.assertEquals(fInStream.available(), 0); - fInStream.close(); + OutputStream fileOutStream = new FileOutputStream(f); + DataOutput outStream = new DataOutputStream(fileOutStream); + HCatRecord[] recs = getHCatRecords(); + for (int i = 0; i < recs.length; i++) { + recs[i].write(outStream); } + fileOutStream.flush(); + fileOutStream.close(); - public void testCompareTo() { - HCatRecord[] recs = getHCatRecords(); - Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[0], recs[1]) == 0); - Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[4], recs[5]) == 0); - } + InputStream fInStream = new FileInputStream(f); + DataInput inpStream = new DataInputStream(fInStream); - public void testEqualsObject() { - - HCatRecord[] recs = getHCatRecords(); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[0], recs[1])); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[4], recs[5])); + for (int i = 0; i < recs.length; i++) { + HCatRecord rec = new DefaultHCatRecord(); + rec.readFields(inpStream); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[i], rec)); } - /** - * Test get and set calls with type - * @throws HCatException - */ - public void testGetSetByType1() throws HCatException { - HCatRecord inpRec = getHCatRecords()[0]; - HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); - HCatSchema hsch = - HCatSchemaUtils.getHCatSchema( - "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string"); - - - newRec.setByte("a", hsch, inpRec.getByte("a", hsch)); - newRec.setShort("b", hsch, inpRec.getShort("b", hsch)); - newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch)); - newRec.setLong("d", hsch, inpRec.getLong("d", hsch)); - newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch)); - newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch)); - newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch)); - newRec.setString("h", hsch, inpRec.getString("h", hsch)); - newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch)); - newRec.setString("j", hsch, inpRec.getString("j", hsch)); - - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); - - - } - - /** - * Test get and set calls with type - * @throws HCatException - */ - public void testGetSetByType2() throws HCatException { - HCatRecord inpRec = getGetSet2InpRec(); - - HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); - HCatSchema hsch = - HCatSchemaUtils.getHCatSchema("a:binary,b:map,c:array,d:struct"); - - - newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch)); - newRec.setMap("b", hsch, inpRec.getMap("b", hsch)); - newRec.setList("c", hsch, inpRec.getList("c", hsch)); - newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch)); - - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); - } - - - private HCatRecord getGetSet2InpRec() { - List rlist = new ArrayList(); - - rlist.add(new byte[]{1, 2, 3}); - - Map mapcol = new HashMap(3); - mapcol.put(new Short("2"), "hcat is cool"); - mapcol.put(new Short("3"), "is it?"); - mapcol.put(new Short("4"), "or is it not?"); - rlist.add(mapcol); - - List listcol = new ArrayList(); - listcol.add(314); - listcol.add(007); - rlist.add(listcol);//list - rlist.add(listcol);//struct - return new DefaultHCatRecord(rlist); - } - - private HCatRecord[] getHCatRecords() { - - List rec_1 = new ArrayList(8); - rec_1.add(new Byte("123")); - rec_1.add(new Short("456")); - rec_1.add(new Integer(789)); - rec_1.add(new Long(1000L)); - rec_1.add(new Float(5.3F)); - rec_1.add(new Double(5.3D)); - rec_1.add(new Boolean(true)); - rec_1.add(new String("hcat and hadoop")); - rec_1.add(null); - rec_1.add("null"); - - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - rec_2.add(new Byte("123")); - rec_2.add(new Short("456")); - rec_2.add(new Integer(789)); - rec_2.add(new Long(1000L)); - rec_2.add(new Float(5.3F)); - rec_2.add(new Double(5.3D)); - rec_2.add(new Boolean(true)); - rec_2.add(new String("hcat and hadoop")); - rec_2.add(null); - rec_2.add("null"); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - List rec_3 = new ArrayList(10); - rec_3.add(new Byte("123")); - rec_3.add(new Short("456")); - rec_3.add(new Integer(789)); - rec_3.add(new Long(1000L)); - rec_3.add(new Double(5.3D)); - rec_3.add(new String("hcat and hadoop")); - rec_3.add(null); - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rec_3.add(innerList); - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rec_3.add(map); - - HCatRecord tup_3 = new DefaultHCatRecord(rec_3); - - List rec_4 = new ArrayList(8); - rec_4.add(new Byte("123")); - rec_4.add(new Short("456")); - rec_4.add(new Integer(789)); - rec_4.add(new Long(1000L)); - rec_4.add(new Double(5.3D)); - rec_4.add(new String("hcat and hadoop")); - rec_4.add(null); - rec_4.add("null"); - - Map map2 = new HashMap(3); - map2.put(new Short("2"), "hcat is cool"); - map2.put(new Short("3"), "is it?"); - map2.put(new Short("4"), "or is it not?"); - rec_4.add(map2); - List innerList2 = new ArrayList(); - innerList2.add(314); - innerList2.add(007); - rec_4.add(innerList2); - HCatRecord tup_4 = new DefaultHCatRecord(rec_4); - - - List rec_5 = new ArrayList(3); - rec_5.add(getByteArray()); - rec_5.add(getStruct()); - rec_5.add(getList()); - HCatRecord tup_5 = new DefaultHCatRecord(rec_5); - - - List rec_6 = new ArrayList(3); - rec_6.add(getByteArray()); - rec_6.add(getStruct()); - rec_6.add(getList()); - HCatRecord tup_6 = new DefaultHCatRecord(rec_6); - - - return new HCatRecord[]{tup_1, tup_2, tup_3, tup_4, tup_5, tup_6}; - - } - - private Object getList() { - return getStruct(); - } - - private Object getByteArray() { - return new byte[]{1, 2, 3, 4}; - } - - private List getStruct() { - List struct = new ArrayList(); - struct.add(new Integer(1)); - struct.add(new String("x")); - return struct; - } + Assert.assertEquals(fInStream.available(), 0); + fInStream.close(); + + } + + public void testCompareTo() { + HCatRecord[] recs = getHCatRecords(); + Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[0], recs[1]) == 0); + Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[4], recs[5]) == 0); + } + + public void testEqualsObject() { + + HCatRecord[] recs = getHCatRecords(); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[0], recs[1])); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[4], recs[5])); + } + + /** + * Test get and set calls with type + * @throws HCatException + */ + public void testGetSetByType1() throws HCatException { + HCatRecord inpRec = getHCatRecords()[0]; + HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); + HCatSchema hsch = + HCatSchemaUtils.getHCatSchema( + "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string"); + + + newRec.setByte("a", hsch, inpRec.getByte("a", hsch)); + newRec.setShort("b", hsch, inpRec.getShort("b", hsch)); + newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch)); + newRec.setLong("d", hsch, inpRec.getLong("d", hsch)); + newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch)); + newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch)); + newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch)); + newRec.setString("h", hsch, inpRec.getString("h", hsch)); + newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch)); + newRec.setString("j", hsch, inpRec.getString("j", hsch)); + + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); + + + } + + /** + * Test get and set calls with type + * @throws HCatException + */ + public void testGetSetByType2() throws HCatException { + HCatRecord inpRec = getGetSet2InpRec(); + + HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); + HCatSchema hsch = + HCatSchemaUtils.getHCatSchema("a:binary,b:map,c:array,d:struct"); + + + newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch)); + newRec.setMap("b", hsch, inpRec.getMap("b", hsch)); + newRec.setList("c", hsch, inpRec.getList("c", hsch)); + newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch)); + + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); + } + + + private HCatRecord getGetSet2InpRec() { + List rlist = new ArrayList(); + + rlist.add(new byte[]{1, 2, 3}); + + Map mapcol = new HashMap(3); + mapcol.put(new Short("2"), "hcat is cool"); + mapcol.put(new Short("3"), "is it?"); + mapcol.put(new Short("4"), "or is it not?"); + rlist.add(mapcol); + + List listcol = new ArrayList(); + listcol.add(314); + listcol.add(007); + rlist.add(listcol);//list + rlist.add(listcol);//struct + return new DefaultHCatRecord(rlist); + } + + private HCatRecord[] getHCatRecords() { + + List rec_1 = new ArrayList(8); + rec_1.add(new Byte("123")); + rec_1.add(new Short("456")); + rec_1.add(new Integer(789)); + rec_1.add(new Long(1000L)); + rec_1.add(new Float(5.3F)); + rec_1.add(new Double(5.3D)); + rec_1.add(new Boolean(true)); + rec_1.add(new String("hcat and hadoop")); + rec_1.add(null); + rec_1.add("null"); + + HCatRecord tup_1 = new DefaultHCatRecord(rec_1); + + List rec_2 = new ArrayList(8); + rec_2.add(new Byte("123")); + rec_2.add(new Short("456")); + rec_2.add(new Integer(789)); + rec_2.add(new Long(1000L)); + rec_2.add(new Float(5.3F)); + rec_2.add(new Double(5.3D)); + rec_2.add(new Boolean(true)); + rec_2.add(new String("hcat and hadoop")); + rec_2.add(null); + rec_2.add("null"); + HCatRecord tup_2 = new DefaultHCatRecord(rec_2); + + List rec_3 = new ArrayList(10); + rec_3.add(new Byte("123")); + rec_3.add(new Short("456")); + rec_3.add(new Integer(789)); + rec_3.add(new Long(1000L)); + rec_3.add(new Double(5.3D)); + rec_3.add(new String("hcat and hadoop")); + rec_3.add(null); + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rec_3.add(innerList); + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rec_3.add(map); + + HCatRecord tup_3 = new DefaultHCatRecord(rec_3); + + List rec_4 = new ArrayList(8); + rec_4.add(new Byte("123")); + rec_4.add(new Short("456")); + rec_4.add(new Integer(789)); + rec_4.add(new Long(1000L)); + rec_4.add(new Double(5.3D)); + rec_4.add(new String("hcat and hadoop")); + rec_4.add(null); + rec_4.add("null"); + + Map map2 = new HashMap(3); + map2.put(new Short("2"), "hcat is cool"); + map2.put(new Short("3"), "is it?"); + map2.put(new Short("4"), "or is it not?"); + rec_4.add(map2); + List innerList2 = new ArrayList(); + innerList2.add(314); + innerList2.add(007); + rec_4.add(innerList2); + HCatRecord tup_4 = new DefaultHCatRecord(rec_4); + + + List rec_5 = new ArrayList(3); + rec_5.add(getByteArray()); + rec_5.add(getStruct()); + rec_5.add(getList()); + HCatRecord tup_5 = new DefaultHCatRecord(rec_5); + + + List rec_6 = new ArrayList(3); + rec_6.add(getByteArray()); + rec_6.add(getStruct()); + rec_6.add(getList()); + HCatRecord tup_6 = new DefaultHCatRecord(rec_6); + + + return new HCatRecord[]{tup_1, tup_2, tup_3, tup_4, tup_5, tup_6}; + + } + + private Object getList() { + return getStruct(); + } + + private Object getByteArray() { + return new byte[]{1, 2, 3, 4}; + } + + private List getStruct() { + List struct = new ArrayList(); + struct.add(new Integer(1)); + struct.add(new String("x")); + return struct; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestHCatRecordSerDe.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestHCatRecordSerDe.java index 8f3e801..cb02a3f 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestHCatRecordSerDe.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestHCatRecordSerDe.java @@ -40,133 +40,133 @@ */ public class TestHCatRecordSerDe extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatRecordSerDe.class); - - public Map getData() { - Map data = new HashMap(); - - List rlist = new ArrayList(11); - rlist.add(new Byte("123")); - rlist.add(new Short("456")); - rlist.add(new Integer(789)); - rlist.add(new Long(1000L)); - rlist.add(new Double(5.3D)); - rlist.add(new Float(2.39F)); - rlist.add(new String("hcat and hadoop")); - rlist.add(null); - - List innerStruct = new ArrayList(2); - innerStruct.add(new String("abc")); - innerStruct.add(new String("def")); - rlist.add(innerStruct); - - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rlist.add(innerList); - - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rlist.add(map); - - rlist.add(new Boolean(true)); - - List c1 = new ArrayList(); - List c1_1 = new ArrayList(); - c1_1.add(new Integer(12)); - List i2 = new ArrayList(); - List ii1 = new ArrayList(); - ii1.add(new Integer(13)); - ii1.add(new Integer(14)); - i2.add(ii1); - Map> ii2 = new HashMap>(); - List iii1 = new ArrayList(); - iii1.add(new Integer(15)); - ii2.put("phew", iii1); - i2.add(ii2); - c1_1.add(i2); - c1.add(c1_1); - rlist.add(c1); - List am = new ArrayList(); - Map am_1 = new HashMap(); - am_1.put("noo", "haha"); - am.add(am_1); - rlist.add(am); - List aa = new ArrayList(); - List aa_1 = new ArrayList(); - aa_1.add("bloo"); - aa_1.add("bwahaha"); - aa.add(aa_1); - rlist.add(aa); - - String typeString = - "tinyint,smallint,int,bigint,double,float,string,string," - + "struct,array,map,boolean," - + "array,ii2:map>>>>," - + "array>,array>"; - Properties props = new Properties(); - - props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1,am,aa"); - props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatRecordSerDe.class); + + public Map getData() { + Map data = new HashMap(); + + List rlist = new ArrayList(11); + rlist.add(new Byte("123")); + rlist.add(new Short("456")); + rlist.add(new Integer(789)); + rlist.add(new Long(1000L)); + rlist.add(new Double(5.3D)); + rlist.add(new Float(2.39F)); + rlist.add(new String("hcat and hadoop")); + rlist.add(null); + + List innerStruct = new ArrayList(2); + innerStruct.add(new String("abc")); + innerStruct.add(new String("def")); + rlist.add(innerStruct); + + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rlist.add(innerList); + + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rlist.add(map); + + rlist.add(new Boolean(true)); + + List c1 = new ArrayList(); + List c1_1 = new ArrayList(); + c1_1.add(new Integer(12)); + List i2 = new ArrayList(); + List ii1 = new ArrayList(); + ii1.add(new Integer(13)); + ii1.add(new Integer(14)); + i2.add(ii1); + Map> ii2 = new HashMap>(); + List iii1 = new ArrayList(); + iii1.add(new Integer(15)); + ii2.put("phew", iii1); + i2.add(ii2); + c1_1.add(i2); + c1.add(c1_1); + rlist.add(c1); + List am = new ArrayList(); + Map am_1 = new HashMap(); + am_1.put("noo", "haha"); + am.add(am_1); + rlist.add(am); + List aa = new ArrayList(); + List aa_1 = new ArrayList(); + aa_1.add("bloo"); + aa_1.add("bwahaha"); + aa.add(aa_1); + rlist.add(aa); + + String typeString = + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct,array,map,boolean," + + "array,ii2:map>>>>," + + "array>,array>"; + Properties props = new Properties(); + + props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1,am,aa"); + props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); // props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); // props.put(Constants.SERIALIZATION_FORMAT, "1"); - data.put(props, new DefaultHCatRecord(rlist)); - return data; - } - - public void testRW() throws Exception { + data.put(props, new DefaultHCatRecord(rlist)); + return data; + } - Configuration conf = new Configuration(); + public void testRW() throws Exception { - for (Entry e : getData().entrySet()) { - Properties tblProps = e.getKey(); - HCatRecord r = e.getValue(); + Configuration conf = new Configuration(); - HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); + for (Entry e : getData().entrySet()) { + Properties tblProps = e.getKey(); + HCatRecord r = e.getValue(); - LOG.info("ORIG: {}", r); + HCatRecordSerDe hrsd = new HCatRecordSerDe(); + hrsd.initialize(conf, tblProps); - Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); - LOG.info("ONE: {}", s); + LOG.info("ORIG: {}", r); - HCatRecord r2 = (HCatRecord) hrsd.deserialize(s); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2)); + Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); + LOG.info("ONE: {}", s); - // If it went through correctly, then s is also a HCatRecord, - // and also equal to the above, and a deepcopy, and this holds - // through for multiple levels more of serialization as well. + HCatRecord r2 = (HCatRecord) hrsd.deserialize(s); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2)); - Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector()); - LOG.info("TWO: {}", s2); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s)); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2)); + // If it went through correctly, then s is also a HCatRecord, + // and also equal to the above, and a deepcopy, and this holds + // through for multiple levels more of serialization as well. - // serialize using another serde, and read out that object repr. - LazySimpleSerDe testSD = new LazySimpleSerDe(); - testSD.initialize(conf, tblProps); + Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector()); + LOG.info("TWO: {}", s2); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s)); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2)); - Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); - LOG.info("THREE: {}", s3); - Object o3 = testSD.deserialize(s3); - Assert.assertFalse(r.getClass().equals(o3.getClass())); + // serialize using another serde, and read out that object repr. + LazySimpleSerDe testSD = new LazySimpleSerDe(); + testSD.initialize(conf, tblProps); - // then serialize again using hrsd, and compare results - HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector()); - LOG.info("FOUR: {}", s4); + Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); + LOG.info("THREE: {}", s3); + Object o3 = testSD.deserialize(s3); + Assert.assertFalse(r.getClass().equals(o3.getClass())); - // Test LazyHCatRecord init and read - LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector()); - LOG.info("FIVE: {}", s5); + // then serialize again using hrsd, and compare results + HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector()); + LOG.info("FOUR: {}", s4); - LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector()); - LOG.info("SIX: {}", s6); + // Test LazyHCatRecord init and read + LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector()); + LOG.info("FIVE: {}", s5); - } + LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector()); + LOG.info("SIX: {}", s6); } + } + } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestJsonSerDe.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestJsonSerDe.java index f41ca36..d5c6707 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestJsonSerDe.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestJsonSerDe.java @@ -38,180 +38,180 @@ */ public class TestJsonSerDe extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestJsonSerDe.class); - - public List> getData() { - List> data = new ArrayList>(); - - List rlist = new ArrayList(13); - rlist.add(new Byte("123")); - rlist.add(new Short("456")); - rlist.add(new Integer(789)); - rlist.add(new Long(1000L)); - rlist.add(new Double(5.3D)); - rlist.add(new Float(2.39F)); - rlist.add(new String("hcat and hadoop")); - rlist.add(null); - - List innerStruct = new ArrayList(2); - innerStruct.add(new String("abc")); - innerStruct.add(new String("def")); - rlist.add(innerStruct); - - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rlist.add(innerList); - - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rlist.add(map); - - rlist.add(new Boolean(true)); - - List c1 = new ArrayList(); - List c1_1 = new ArrayList(); - c1_1.add(new Integer(12)); - List i2 = new ArrayList(); - List ii1 = new ArrayList(); - ii1.add(new Integer(13)); - ii1.add(new Integer(14)); - i2.add(ii1); - Map> ii2 = new HashMap>(); - List iii1 = new ArrayList(); - iii1.add(new Integer(15)); - ii2.put("phew", iii1); - i2.add(ii2); - c1_1.add(i2); - c1.add(c1_1); - rlist.add(c1); - - List nlist = new ArrayList(13); - nlist.add(null); // tinyint - nlist.add(null); // smallint - nlist.add(null); // int - nlist.add(null); // bigint - nlist.add(null); // double - nlist.add(null); // float - nlist.add(null); // string - nlist.add(null); // string - nlist.add(null); // struct - nlist.add(null); // array - nlist.add(null); // map - nlist.add(null); // bool - nlist.add(null); // complex - - String typeString = - "tinyint,smallint,int,bigint,double,float,string,string," - + "struct,array,map,boolean," - + "array,ii2:map>>>>"; - Properties props = new Properties(); - - props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1"); - props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); + private static final Logger LOG = LoggerFactory.getLogger(TestJsonSerDe.class); + + public List> getData() { + List> data = new ArrayList>(); + + List rlist = new ArrayList(13); + rlist.add(new Byte("123")); + rlist.add(new Short("456")); + rlist.add(new Integer(789)); + rlist.add(new Long(1000L)); + rlist.add(new Double(5.3D)); + rlist.add(new Float(2.39F)); + rlist.add(new String("hcat and hadoop")); + rlist.add(null); + + List innerStruct = new ArrayList(2); + innerStruct.add(new String("abc")); + innerStruct.add(new String("def")); + rlist.add(innerStruct); + + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rlist.add(innerList); + + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rlist.add(map); + + rlist.add(new Boolean(true)); + + List c1 = new ArrayList(); + List c1_1 = new ArrayList(); + c1_1.add(new Integer(12)); + List i2 = new ArrayList(); + List ii1 = new ArrayList(); + ii1.add(new Integer(13)); + ii1.add(new Integer(14)); + i2.add(ii1); + Map> ii2 = new HashMap>(); + List iii1 = new ArrayList(); + iii1.add(new Integer(15)); + ii2.put("phew", iii1); + i2.add(ii2); + c1_1.add(i2); + c1.add(c1_1); + rlist.add(c1); + + List nlist = new ArrayList(13); + nlist.add(null); // tinyint + nlist.add(null); // smallint + nlist.add(null); // int + nlist.add(null); // bigint + nlist.add(null); // double + nlist.add(null); // float + nlist.add(null); // string + nlist.add(null); // string + nlist.add(null); // struct + nlist.add(null); // array + nlist.add(null); // map + nlist.add(null); // bool + nlist.add(null); // complex + + String typeString = + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct,array,map,boolean," + + "array,ii2:map>>>>"; + Properties props = new Properties(); + + props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1"); + props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); // props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); // props.put(Constants.SERIALIZATION_FORMAT, "1"); - data.add(new Pair(props, new DefaultHCatRecord(rlist))); - data.add(new Pair(props, new DefaultHCatRecord(nlist))); - return data; - } - - public void testRW() throws Exception { + data.add(new Pair(props, new DefaultHCatRecord(rlist))); + data.add(new Pair(props, new DefaultHCatRecord(nlist))); + return data; + } - Configuration conf = new Configuration(); + public void testRW() throws Exception { - for (Pair e : getData()) { - Properties tblProps = e.first; - HCatRecord r = e.second; + Configuration conf = new Configuration(); - HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); + for (Pair e : getData()) { + Properties tblProps = e.first; + HCatRecord r = e.second; - JsonSerDe jsde = new JsonSerDe(); - jsde.initialize(conf, tblProps); + HCatRecordSerDe hrsd = new HCatRecordSerDe(); + hrsd.initialize(conf, tblProps); - LOG.info("ORIG:{}", r); + JsonSerDe jsde = new JsonSerDe(); + jsde.initialize(conf, tblProps); - Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); - LOG.info("ONE:{}", s); + LOG.info("ORIG:{}", r); - Object o1 = hrsd.deserialize(s); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1)); + Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); + LOG.info("ONE:{}", s); - Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector()); - LOG.info("TWO:{}", s2); - Object o2 = jsde.deserialize(s2); - LOG.info("deserialized TWO : {} ", o2); + Object o1 = hrsd.deserialize(s); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1)); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); - } + Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector()); + LOG.info("TWO:{}", s2); + Object o2 = jsde.deserialize(s2); + LOG.info("deserialized TWO : {} ", o2); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); } - public void testRobustRead() throws Exception { - /** - * This test has been added to account for HCATALOG-436 - * We write out columns with "internal column names" such - * as "_col0", but try to read with retular column names. - */ + } - Configuration conf = new Configuration(); + public void testRobustRead() throws Exception { + /** + * This test has been added to account for HCATALOG-436 + * We write out columns with "internal column names" such + * as "_col0", but try to read with retular column names. + */ - for (Pair e : getData()) { - Properties tblProps = e.first; - HCatRecord r = e.second; + Configuration conf = new Configuration(); - Properties internalTblProps = new Properties(); - for (Map.Entry pe : tblProps.entrySet()) { - if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) { - internalTblProps.put(pe.getKey(), pe.getValue()); - } else { - internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue())); - } - } + for (Pair e : getData()) { + Properties tblProps = e.first; + HCatRecord r = e.second; - LOG.info("orig tbl props:{}", tblProps); - LOG.info("modif tbl props:{}", internalTblProps); + Properties internalTblProps = new Properties(); + for (Map.Entry pe : tblProps.entrySet()) { + if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) { + internalTblProps.put(pe.getKey(), pe.getValue()); + } else { + internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue())); + } + } - JsonSerDe wjsd = new JsonSerDe(); - wjsd.initialize(conf, internalTblProps); + LOG.info("orig tbl props:{}", tblProps); + LOG.info("modif tbl props:{}", internalTblProps); - JsonSerDe rjsd = new JsonSerDe(); - rjsd.initialize(conf, tblProps); + JsonSerDe wjsd = new JsonSerDe(); + wjsd.initialize(conf, internalTblProps); - LOG.info("ORIG:{}", r); + JsonSerDe rjsd = new JsonSerDe(); + rjsd.initialize(conf, tblProps); - Writable s = wjsd.serialize(r, wjsd.getObjectInspector()); - LOG.info("ONE:{}", s); + LOG.info("ORIG:{}", r); - Object o1 = wjsd.deserialize(s); - LOG.info("deserialized ONE : {} ", o1); + Writable s = wjsd.serialize(r, wjsd.getObjectInspector()); + LOG.info("ONE:{}", s); - Object o2 = rjsd.deserialize(s); - LOG.info("deserialized TWO : {} ", o2); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); - } + Object o1 = wjsd.deserialize(s); + LOG.info("deserialized ONE : {} ", o1); + Object o2 = rjsd.deserialize(s); + LOG.info("deserialized TWO : {} ", o2); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); } - String getInternalNames(String columnNames) { - if (columnNames == null) { - return null; - } - if (columnNames.isEmpty()) { - return ""; - } + } - StringBuffer sb = new StringBuffer(); - int numStrings = columnNames.split(",").length; - sb.append("_col0"); - for (int i = 1; i < numStrings; i++) { - sb.append(","); - sb.append(HiveConf.getColumnInternalName(i)); - } - return sb.toString(); + String getInternalNames(String columnNames) { + if (columnNames == null) { + return null; + } + if (columnNames.isEmpty()) { + return ""; + } + + StringBuffer sb = new StringBuffer(); + int numStrings = columnNames.split(",").length; + sb.append("_col0"); + for (int i = 1; i < numStrings; i++) { + sb.append(","); + sb.append(HiveConf.getColumnInternalName(i)); } + return sb.toString(); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestLazyHCatRecord.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestLazyHCatRecord.java index ed0147c..af561cb 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestLazyHCatRecord.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestLazyHCatRecord.java @@ -35,162 +35,162 @@ */ public class TestLazyHCatRecord { - private final int INT_CONST = 789; - private final long LONG_CONST = 5000000000L; - private final double DOUBLE_CONST = 3.141592654; - private final String STRING_CONST = "hello world"; - - @Test - public void testGet() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get(3)); + private final int INT_CONST = 789; + private final long LONG_CONST = 5000000000L; + private final double DOUBLE_CONST = 3.141592654; + private final String STRING_CONST = "hello world"; + + @Test + public void testGet() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get(3)); + } + + @Test + public void testGetWithName() throws Exception { + TypeInfo ti = getTypeInfo(); + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti)); + HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti) + .get(0).getStructSubSchema(); + Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema)); + } + + @Test + public void testGetAll() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + List list = r.getAll(); + Assert.assertEquals(INT_CONST, ((Integer) list.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) list.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) list.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) list.get(3)); + } + + @Test + public void testSet() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.set(3, "Mary had a little lamb"); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testGetWithName() throws Exception { - TypeInfo ti = getTypeInfo(); - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti)); - HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti) - .get(0).getStructSubSchema(); - Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema)); - } - - @Test - public void testGetAll() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - List list = r.getAll(); - Assert.assertEquals(INT_CONST, ((Integer) list.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) list.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) list.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) list.get(3)); - } - - @Test - public void testSet() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.set(3, "Mary had a little lamb"); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testSize() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - Assert.assertEquals(4, r.size()); - } - - @Test - public void testReadFields() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.readFields(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testWrite() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.write(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testSetWithName() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.set("fred", null, "bob"); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); + Assert.assertTrue(sawException); + } + + @Test + public void testSize() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + Assert.assertEquals(4, r.size()); + } + + @Test + public void testReadFields() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.readFields(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testRemove() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.remove(0); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); + Assert.assertTrue(sawException); + } + + @Test + public void testWrite() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.write(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testCopy() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.copy(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); + Assert.assertTrue(sawException); + } + + @Test + public void testSetWithName() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.set("fred", null, "bob"); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testGetWritable() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()).getWritable(); - Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get(3)); - Assert.assertEquals("org.apache.hcatalog.data.DefaultHCatRecord", r.getClass().getName()); + Assert.assertTrue(sawException); + } + + @Test + public void testRemove() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.remove(0); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - private HCatRecord getHCatRecord() throws Exception { - List rec_1 = new ArrayList(4); - rec_1.add( new Integer(INT_CONST)); - rec_1.add( new Long(LONG_CONST)); - rec_1.add( new Double(DOUBLE_CONST)); - rec_1.add( new String(STRING_CONST)); - - return new DefaultHCatRecord(rec_1); - } - - private TypeInfo getTypeInfo() throws Exception { - List names = new ArrayList(4); - names.add("an_int"); - names.add("a_long"); - names.add("a_double"); - names.add("a_string"); - - List tis = new ArrayList(4); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("int")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("double")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("string")); - - return TypeInfoFactory.getStructTypeInfo(names, tis); - } - - private ObjectInspector getObjectInspector(TypeInfo ti) throws Exception { - return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector((StructTypeInfo)ti); - } - - private ObjectInspector getObjectInspector() throws Exception { - return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector( - (StructTypeInfo)getTypeInfo()); + Assert.assertTrue(sawException); + } + + @Test + public void testCopy() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.copy(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; } + Assert.assertTrue(sawException); + } + + @Test + public void testGetWritable() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()).getWritable(); + Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get(3)); + Assert.assertEquals("org.apache.hcatalog.data.DefaultHCatRecord", r.getClass().getName()); + } + + private HCatRecord getHCatRecord() throws Exception { + List rec_1 = new ArrayList(4); + rec_1.add( new Integer(INT_CONST)); + rec_1.add( new Long(LONG_CONST)); + rec_1.add( new Double(DOUBLE_CONST)); + rec_1.add( new String(STRING_CONST)); + + return new DefaultHCatRecord(rec_1); + } + + private TypeInfo getTypeInfo() throws Exception { + List names = new ArrayList(4); + names.add("an_int"); + names.add("a_long"); + names.add("a_double"); + names.add("a_string"); + + List tis = new ArrayList(4); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("int")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("double")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("string")); + + return TypeInfoFactory.getStructTypeInfo(names, tis); + } + + private ObjectInspector getObjectInspector(TypeInfo ti) throws Exception { + return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector((StructTypeInfo)ti); + } + + private ObjectInspector getObjectInspector() throws Exception { + return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector( + (StructTypeInfo)getTypeInfo()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestReaderWriter.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestReaderWriter.java index 2c28bdd..791de88 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestReaderWriter.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/TestReaderWriter.java @@ -53,134 +53,134 @@ */ public class TestReaderWriter extends HCatBaseTest { - @Test - public void test() throws MetaException, CommandNeedRetryException, - IOException, ClassNotFoundException { - - driver.run("drop table mytbl"); - driver.run("create table mytbl (a string, b int)"); - Iterator> itr = hiveConf.iterator(); - Map map = new HashMap(); - while (itr.hasNext()) { - Entry kv = itr.next(); - map.put(kv.getKey(), kv.getValue()); - } - - WriterContext cntxt = runsInMaster(map); - - File writeCntxtFile = File.createTempFile("hcat-write", "temp"); - writeCntxtFile.deleteOnExit(); - - // Serialize context. - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(writeCntxtFile)); - oos.writeObject(cntxt); - oos.flush(); - oos.close(); - - // Now, deserialize it. - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(writeCntxtFile)); - cntxt = (WriterContext) ois.readObject(); - ois.close(); - - runsInSlave(cntxt); - commit(map, true, cntxt); - - ReaderContext readCntxt = runsInMaster(map, false); - - File readCntxtFile = File.createTempFile("hcat-read", "temp"); - readCntxtFile.deleteOnExit(); - oos = new ObjectOutputStream(new FileOutputStream(readCntxtFile)); - oos.writeObject(readCntxt); - oos.flush(); - oos.close(); - - ois = new ObjectInputStream(new FileInputStream(readCntxtFile)); - readCntxt = (ReaderContext) ois.readObject(); - ois.close(); - - for (InputSplit split : readCntxt.getSplits()) { - runsInSlave(split, readCntxt.getConf()); - } + @Test + public void test() throws MetaException, CommandNeedRetryException, + IOException, ClassNotFoundException { + + driver.run("drop table mytbl"); + driver.run("create table mytbl (a string, b int)"); + Iterator> itr = hiveConf.iterator(); + Map map = new HashMap(); + while (itr.hasNext()) { + Entry kv = itr.next(); + map.put(kv.getKey(), kv.getValue()); } - private WriterContext runsInMaster(Map config) throws HCatException { + WriterContext cntxt = runsInMaster(map); - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable("mytbl").build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - WriterContext info = writer.prepareWrite(); - return info; - } + File writeCntxtFile = File.createTempFile("hcat-write", "temp"); + writeCntxtFile.deleteOnExit(); - private ReaderContext runsInMaster(Map config, boolean bogus) - throws HCatException { - ReadEntity entity = new ReadEntity.Builder().withTable("mytbl").build(); - HCatReader reader = DataTransferFactory.getHCatReader(entity, config); - ReaderContext cntxt = reader.prepareRead(); - return cntxt; - } + // Serialize context. + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(writeCntxtFile)); + oos.writeObject(cntxt); + oos.flush(); + oos.close(); - private void runsInSlave(InputSplit split, Configuration config) throws HCatException { - - HCatReader reader = DataTransferFactory.getHCatReader(split, config); - Iterator itr = reader.read(); - int i = 1; - while (itr.hasNext()) { - HCatRecord read = itr.next(); - HCatRecord written = getRecord(i++); - // Argh, HCatRecord doesnt implement equals() - Assert.assertTrue("Read: " + read.get(0) + "Written: " + written.get(0), - written.get(0).equals(read.get(0))); - Assert.assertTrue("Read: " + read.get(1) + "Written: " + written.get(1), - written.get(1).equals(read.get(1))); - Assert.assertEquals(2, read.size()); - } - //Assert.assertFalse(itr.hasNext()); - } + // Now, deserialize it. + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(writeCntxtFile)); + cntxt = (WriterContext) ois.readObject(); + ois.close(); - private void runsInSlave(WriterContext context) throws HCatException { + runsInSlave(cntxt); + commit(map, true, cntxt); - HCatWriter writer = DataTransferFactory.getHCatWriter(context); - writer.write(new HCatRecordItr()); - } + ReaderContext readCntxt = runsInMaster(map, false); - private void commit(Map config, boolean status, - WriterContext context) throws IOException { - - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable("mytbl").build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - if (status) { - writer.commit(context); - } else { - writer.abort(context); - } + File readCntxtFile = File.createTempFile("hcat-read", "temp"); + readCntxtFile.deleteOnExit(); + oos = new ObjectOutputStream(new FileOutputStream(readCntxtFile)); + oos.writeObject(readCntxt); + oos.flush(); + oos.close(); + + ois = new ObjectInputStream(new FileInputStream(readCntxtFile)); + readCntxt = (ReaderContext) ois.readObject(); + ois.close(); + + for (InputSplit split : readCntxt.getSplits()) { + runsInSlave(split, readCntxt.getConf()); + } + } + + private WriterContext runsInMaster(Map config) throws HCatException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable("mytbl").build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + WriterContext info = writer.prepareWrite(); + return info; + } + + private ReaderContext runsInMaster(Map config, boolean bogus) + throws HCatException { + ReadEntity entity = new ReadEntity.Builder().withTable("mytbl").build(); + HCatReader reader = DataTransferFactory.getHCatReader(entity, config); + ReaderContext cntxt = reader.prepareRead(); + return cntxt; + } + + private void runsInSlave(InputSplit split, Configuration config) throws HCatException { + + HCatReader reader = DataTransferFactory.getHCatReader(split, config); + Iterator itr = reader.read(); + int i = 1; + while (itr.hasNext()) { + HCatRecord read = itr.next(); + HCatRecord written = getRecord(i++); + // Argh, HCatRecord doesnt implement equals() + Assert.assertTrue("Read: " + read.get(0) + "Written: " + written.get(0), + written.get(0).equals(read.get(0))); + Assert.assertTrue("Read: " + read.get(1) + "Written: " + written.get(1), + written.get(1).equals(read.get(1))); + Assert.assertEquals(2, read.size()); } + //Assert.assertFalse(itr.hasNext()); + } - private static HCatRecord getRecord(int i) { - List list = new ArrayList(2); - list.add("Row #: " + i); - list.add(i); - return new DefaultHCatRecord(list); + private void runsInSlave(WriterContext context) throws HCatException { + + HCatWriter writer = DataTransferFactory.getHCatWriter(context); + writer.write(new HCatRecordItr()); + } + + private void commit(Map config, boolean status, + WriterContext context) throws IOException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable("mytbl").build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + if (status) { + writer.commit(context); + } else { + writer.abort(context); } + } - private static class HCatRecordItr implements Iterator { + private static HCatRecord getRecord(int i) { + List list = new ArrayList(2); + list.add("Row #: " + i); + list.add(i); + return new DefaultHCatRecord(list); + } - int i = 0; + private static class HCatRecordItr implements Iterator { - @Override - public boolean hasNext() { - return i++ < 100 ? true : false; - } + int i = 0; - @Override - public HCatRecord next() { - return getRecord(i); - } + @Override + public boolean hasNext() { + return i++ < 100 ? true : false; + } + + @Override + public HCatRecord next() { + return getRecord(i); + } - @Override - public void remove() { - throw new RuntimeException(); - } + @Override + public void remove() { + throw new RuntimeException(); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchema.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchema.java index 83868fc..06c6f69 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchema.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchema.java @@ -27,55 +27,55 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.data.schema.TestHCatSchema} instead */ public class TestHCatSchema extends TestCase { - public void testCannotAddFieldMoreThanOnce() throws HCatException { - List fieldSchemaList = new ArrayList(); - fieldSchemaList.add(new HCatFieldSchema("name", HCatFieldSchema.Type.STRING, "What's your handle?")); - fieldSchemaList.add(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); + public void testCannotAddFieldMoreThanOnce() throws HCatException { + List fieldSchemaList = new ArrayList(); + fieldSchemaList.add(new HCatFieldSchema("name", HCatFieldSchema.Type.STRING, "What's your handle?")); + fieldSchemaList.add(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); - HCatSchema schema = new HCatSchema(fieldSchemaList); + HCatSchema schema = new HCatSchema(fieldSchemaList); - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); - try { - schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); - fail("Was able to append field schema with same name"); - } catch (HCatException he) { - assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); - } - - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); + try { + schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); + fail("Was able to append field schema with same name"); + } catch (HCatException he) { + assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); + } - // Should also not be able to add fields of different types with same name - try { - schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.STRING, "Maybe spelled out?")); - fail("Was able to append field schema with same name"); - } catch (HCatException he) { - assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); - } + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); + // Should also not be able to add fields of different types with same name + try { + schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.STRING, "Maybe spelled out?")); + fail("Was able to append field schema with same name"); + } catch (HCatException he) { + assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); } - public void testCannotInstantiateSchemaWithRepeatedFieldNames() throws HCatException { - List fieldSchemaList = new ArrayList(); + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); + } + + public void testCannotInstantiateSchemaWithRepeatedFieldNames() throws HCatException { + List fieldSchemaList = new ArrayList(); - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); - fieldSchemaList.add(new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo")); + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); + fieldSchemaList.add(new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo")); - // No duplicate names. This should be ok - HCatSchema schema = new HCatSchema(fieldSchemaList); + // No duplicate names. This should be ok + HCatSchema schema = new HCatSchema(fieldSchemaList); - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.STRING, "as a String")); + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.STRING, "as a String")); - // Now a duplicated field name. Should fail - try { - HCatSchema schema2 = new HCatSchema(fieldSchemaList); - fail("Able to add duplicate field name"); - } catch (IllegalArgumentException iae) { - assertTrue(iae.getMessage().contains("Field named memberID already exists")); - } + // Now a duplicated field name. Should fail + try { + HCatSchema schema2 = new HCatSchema(fieldSchemaList); + fail("Able to add duplicate field name"); + } catch (IllegalArgumentException iae) { + assertTrue(iae.getMessage().contains("Field named memberID already exists")); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchemaUtils.java b/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchemaUtils.java index 3b0e268..a86faf8 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchemaUtils.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchemaUtils.java @@ -33,52 +33,52 @@ */ public class TestHCatSchemaUtils extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatSchemaUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatSchemaUtils.class); - public void testSimpleOperation() throws Exception { - String typeString = "struct," - + "currently_registered_courses:array," - + "current_grades:map," - + "phnos:array>,blah:array>"; + public void testSimpleOperation() throws Exception { + String typeString = "struct," + + "currently_registered_courses:array," + + "current_grades:map," + + "phnos:array>,blah:array>"; - TypeInfo ti = TypeInfoUtils.getTypeInfoFromTypeString(typeString); + TypeInfo ti = TypeInfoUtils.getTypeInfoFromTypeString(typeString); - HCatSchema hsch = HCatSchemaUtils.getHCatSchemaFromTypeString(typeString); - LOG.info("Type name : {}", ti.getTypeName()); - LOG.info("HCatSchema : {}", hsch); - assertEquals(hsch.size(), 1); - assertEquals(ti.getTypeName(), hsch.get(0).getTypeString()); - assertEquals(hsch.get(0).getTypeString(), typeString); - } + HCatSchema hsch = HCatSchemaUtils.getHCatSchemaFromTypeString(typeString); + LOG.info("Type name : {}", ti.getTypeName()); + LOG.info("HCatSchema : {}", hsch); + assertEquals(hsch.size(), 1); + assertEquals(ti.getTypeName(), hsch.get(0).getTypeString()); + assertEquals(hsch.get(0).getTypeString(), typeString); + } - @SuppressWarnings("unused") - private void pretty_print(PrintStream pout, HCatSchema hsch) throws HCatException { - pretty_print(pout, hsch, ""); - } + @SuppressWarnings("unused") + private void pretty_print(PrintStream pout, HCatSchema hsch) throws HCatException { + pretty_print(pout, hsch, ""); + } - private void pretty_print(PrintStream pout, HCatSchema hsch, String prefix) throws HCatException { - int i = 0; - for (HCatFieldSchema field : hsch.getFields()) { - pretty_print(pout, field, prefix + "." + (field.getName() == null ? i : field.getName())); - i++; - } + private void pretty_print(PrintStream pout, HCatSchema hsch, String prefix) throws HCatException { + int i = 0; + for (HCatFieldSchema field : hsch.getFields()) { + pretty_print(pout, field, prefix + "." + (field.getName() == null ? i : field.getName())); + i++; } + } - private void pretty_print(PrintStream pout, HCatFieldSchema hfsch, String prefix) throws HCatException { + private void pretty_print(PrintStream pout, HCatFieldSchema hfsch, String prefix) throws HCatException { - Category tcat = hfsch.getCategory(); - if (Category.STRUCT == tcat) { - pretty_print(pout, hfsch.getStructSubSchema(), prefix); - } else if (Category.ARRAY == tcat) { - pretty_print(pout, hfsch.getArrayElementSchema(), prefix); - } else if (Category.MAP == tcat) { - pout.println(prefix + ".mapkey:\t" + hfsch.getMapKeyType().toString()); - pretty_print(pout, hfsch.getMapValueSchema(), prefix + ".mapvalue:"); - } else { - pout.println(prefix + "\t" + hfsch.getType().toString()); - } + Category tcat = hfsch.getCategory(); + if (Category.STRUCT == tcat) { + pretty_print(pout, hfsch.getStructSubSchema(), prefix); + } else if (Category.ARRAY == tcat) { + pretty_print(pout, hfsch.getArrayElementSchema(), prefix); + } else if (Category.MAP == tcat) { + pout.println(prefix + ".mapkey:\t" + hfsch.getMapKeyType().toString()); + pretty_print(pout, hfsch.getMapValueSchema(), prefix + ".mapvalue:"); + } else { + pout.println(prefix + "\t" + hfsch.getType().toString()); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java index dd55557..a76ee53 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java @@ -30,26 +30,26 @@ */ public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned { - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testOrcDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); - } - - @Override - protected String inputFormat() { - return OrcInputFormat.class.getName(); - } - - @Override - protected String outputFormat() { - return OrcOutputFormat.class.getName(); - } - - @Override - protected String serdeClass() { - return OrcSerde.class.getName(); - } + @BeforeClass + public static void generateInputData() throws Exception { + tableName = "testOrcDynamicPartitionedTable"; + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + generateDataColumns(); + } + + @Override + protected String inputFormat() { + return OrcInputFormat.class.getName(); + } + + @Override + protected String outputFormat() { + return OrcOutputFormat.class.getName(); + } + + @Override + protected String serdeClass() { + return OrcSerde.class.getName(); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatBaseTest.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatBaseTest.java index f582580..1f1f2af 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatBaseTest.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatBaseTest.java @@ -40,48 +40,48 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.HCatBaseTest} instead */ public class HCatBaseTest { - protected static final Logger LOG = LoggerFactory.getLogger(HCatBaseTest.class); - protected static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + HCatBaseTest.class.getCanonicalName(); - protected static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + protected static final Logger LOG = LoggerFactory.getLogger(HCatBaseTest.class); + protected static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + HCatBaseTest.class.getCanonicalName(); + protected static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - protected HiveConf hiveConf = null; - protected Driver driver = null; - protected HiveMetaStoreClient client = null; + protected HiveConf hiveConf = null; + protected Driver driver = null; + protected HiveMetaStoreClient client = null; - @BeforeClass - public static void setUpTestDataDir() throws Exception { - LOG.info("Using warehouse directory " + TEST_WAREHOUSE_DIR); - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - Assert.assertTrue(new File(TEST_WAREHOUSE_DIR).mkdirs()); + @BeforeClass + public static void setUpTestDataDir() throws Exception { + LOG.info("Using warehouse directory " + TEST_WAREHOUSE_DIR); + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); } + Assert.assertTrue(new File(TEST_WAREHOUSE_DIR).mkdirs()); + } - @Before - public void setUp() throws Exception { - if (driver == null) { - setUpHiveConf(); - driver = new Driver(hiveConf); - client = new HiveMetaStoreClient(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - } + @Before + public void setUp() throws Exception { + if (driver == null) { + setUpHiveConf(); + driver = new Driver(hiveConf); + client = new HiveMetaStoreClient(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); } + } - /** - * Create a new HiveConf and set properties necessary for unit tests. - */ - protected void setUpHiveConf() { - hiveConf = new HiveConf(this.getClass()); - hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, ""); - hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); - hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); - hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); - } + /** + * Create a new HiveConf and set properties necessary for unit tests. + */ + protected void setUpHiveConf() { + hiveConf = new HiveConf(this.getClass()); + hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, ""); + hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); + hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); + } - protected void logAndRegister(PigServer server, String query) throws IOException { - LOG.info("Registering pig query: " + query); - server.registerQuery(query); - } + protected void logAndRegister(PigServer server, String query) throws IOException { + LOG.info("Registering pig query: " + query); + server.registerQuery(query); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java index 82bd1e2..b98b155 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java @@ -72,299 +72,299 @@ */ public abstract class HCatMapReduceTest extends HCatBaseTest { - private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class); - protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - protected static String tableName = "testHCatMapReduceTable"; + private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class); + protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + protected static String tableName = "testHCatMapReduceTable"; - private static List writeRecords = new ArrayList(); - private static List readRecords = new ArrayList(); + private static List writeRecords = new ArrayList(); + private static List readRecords = new ArrayList(); - protected abstract List getPartitionKeys(); + protected abstract List getPartitionKeys(); - protected abstract List getTableColumns(); + protected abstract List getTableColumns(); - private static FileSystem fs; + private static FileSystem fs; - protected String inputFormat() { - return RCFileInputFormat.class.getName(); - } - - protected String outputFormat() { - return RCFileOutputFormat.class.getName(); - } - - protected String serdeClass() { - return ColumnarSerDe.class.getName(); - } - - @BeforeClass - public static void setUpOneTime() throws Exception { - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - - HiveConf hiveConf = new HiveConf(); - hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); - // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time - // Otherwise the cache doesn't play well with the second test method with the client gets closed() in the - // tearDown() of the previous test - HCatUtil.getHiveClient(hiveConf); - - MapCreate.writeCount = 0; - MapRead.readCount = 0; - } + protected String inputFormat() { + return RCFileInputFormat.class.getName(); + } - @After - public void deleteTable() throws Exception { - try { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + protected String outputFormat() { + return RCFileOutputFormat.class.getName(); + } - client.dropTable(databaseName, tableName); - } catch (Exception e) { - e.printStackTrace(); - throw e; - } - } + protected String serdeClass() { + return ColumnarSerDe.class.getName(); + } - @Before - public void createTable() throws Exception { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + @BeforeClass + public static void setUpOneTime() throws Exception { + fs = new LocalFileSystem(); + fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - try { - client.dropTable(databaseName, tableName); - } catch (Exception e) { - } //can fail with NoSuchObjectException + HiveConf hiveConf = new HiveConf(); + hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); + // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time + // Otherwise the cache doesn't play well with the second test method with the client gets closed() in the + // tearDown() of the previous test + HCatUtil.getHiveClient(hiveConf); + MapCreate.writeCount = 0; + MapRead.readCount = 0; + } - Table tbl = new Table(); - tbl.setDbName(databaseName); - tbl.setTableName(tableName); - tbl.setTableType("MANAGED_TABLE"); - StorageDescriptor sd = new StorageDescriptor(); + @After + public void deleteTable() throws Exception { + try { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; - sd.setCols(getTableColumns()); - tbl.setPartitionKeys(getPartitionKeys()); + client.dropTable(databaseName, tableName); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } - tbl.setSd(sd); + @Before + public void createTable() throws Exception { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; - sd.setBucketCols(new ArrayList(2)); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib(serdeClass()); - sd.setInputFormat(inputFormat()); - sd.setOutputFormat(outputFormat()); + try { + client.dropTable(databaseName, tableName); + } catch (Exception e) { + } //can fail with NoSuchObjectException - Map tableParams = new HashMap(); - tbl.setParameters(tableParams); - client.createTable(tbl); - } + Table tbl = new Table(); + tbl.setDbName(databaseName); + tbl.setTableName(tableName); + tbl.setTableType("MANAGED_TABLE"); + StorageDescriptor sd = new StorageDescriptor(); - //Create test input file with specified number of rows - private void createInputFile(Path path, int rowCount) throws IOException { + sd.setCols(getTableColumns()); + tbl.setPartitionKeys(getPartitionKeys()); - if (fs.exists(path)) { - fs.delete(path, true); - } + tbl.setSd(sd); - FSDataOutputStream os = fs.create(path); + sd.setBucketCols(new ArrayList(2)); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib(serdeClass()); + sd.setInputFormat(inputFormat()); + sd.setOutputFormat(outputFormat()); - for (int i = 0; i < rowCount; i++) { - os.writeChars(i + "\n"); - } + Map tableParams = new HashMap(); + tbl.setParameters(tableParams); + + client.createTable(tbl); + } + + //Create test input file with specified number of rows + private void createInputFile(Path path, int rowCount) throws IOException { - os.close(); + if (fs.exists(path)) { + fs.delete(path, true); } - public static class MapCreate extends - Mapper { + FSDataOutputStream os = fs.create(path); - static int writeCount = 0; //test will be in local mode + for (int i = 0; i < rowCount; i++) { + os.writeChars(i + "\n"); + } - @Override - public void map(LongWritable key, Text value, Context context - ) throws IOException, InterruptedException { - { - try { - HCatRecord rec = writeRecords.get(writeCount); - context.write(null, rec); - writeCount++; + os.close(); + } - } catch (Exception e) { + public static class MapCreate extends + Mapper { - e.printStackTrace(System.err); //print since otherwise exception is lost - throw new IOException(e); - } - } - } - } + static int writeCount = 0; //test will be in local mode - public static class MapRead extends - Mapper { - - static int readCount = 0; //test will be in local mode - - @Override - public void map(WritableComparable key, HCatRecord value, Context context - ) throws IOException, InterruptedException { - { - try { - readRecords.add(value); - readCount++; - } catch (Exception e) { - e.printStackTrace(); //print since otherwise exception is lost - throw new IOException(e); - } - } - } - } + @Override + public void map(LongWritable key, Text value, Context context + ) throws IOException, InterruptedException { + { + try { + HCatRecord rec = writeRecords.get(writeCount); + context.write(null, rec); + writeCount++; - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite) throws Exception { - return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true); - } + } catch (Exception e) { - /** - * Run a local map reduce job to load data from in memory records to an HCatalog Table - * @param partitionValues - * @param partitionColumns - * @param records data to be written to HCatalog table - * @param writeCount - * @param assertWrite - * @param asSingleMapTask - * @return - * @throws Exception - */ - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite, boolean asSingleMapTask) throws Exception { - - writeRecords = records; - MapCreate.writeCount = 0; - - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce write test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(HCatMapReduceTest.MapCreate.class); - - // input/output settings - job.setInputFormatClass(TextInputFormat.class); - - if (asSingleMapTask) { - // One input path would mean only one map task - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); - createInputFile(path, writeCount); - TextInputFormat.setInputPaths(job, path); - } else { - // Create two input paths so that two map tasks get triggered. There could be other ways - // to trigger two map tasks. - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); - createInputFile(path, writeCount / 2); - - Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); - createInputFile(path2, (writeCount - writeCount / 2)); - - TextInputFormat.setInputPaths(job, path, path2); + e.printStackTrace(System.err); //print since otherwise exception is lost + throw new IOException(e); } + } + } + } - job.setOutputFormatClass(HCatOutputFormat.class); + public static class MapRead extends + Mapper { - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); - HCatOutputFormat.setOutput(job, outputJobInfo); + static int readCount = 0; //test will be in local mode - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(DefaultHCatRecord.class); + @Override + public void map(WritableComparable key, HCatRecord value, Context context + ) throws IOException, InterruptedException { + { + try { + readRecords.add(value); + readCount++; + } catch (Exception e) { + e.printStackTrace(); //print since otherwise exception is lost + throw new IOException(e); + } + } + } + } + + Job runMRCreate(Map partitionValues, + List partitionColumns, List records, + int writeCount, boolean assertWrite) throws Exception { + return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true); + } + + /** + * Run a local map reduce job to load data from in memory records to an HCatalog Table + * @param partitionValues + * @param partitionColumns + * @param records data to be written to HCatalog table + * @param writeCount + * @param assertWrite + * @param asSingleMapTask + * @return + * @throws Exception + */ + Job runMRCreate(Map partitionValues, + List partitionColumns, List records, + int writeCount, boolean assertWrite, boolean asSingleMapTask) throws Exception { + + writeRecords = records; + MapCreate.writeCount = 0; + + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce write test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(HCatMapReduceTest.MapCreate.class); + + // input/output settings + job.setInputFormatClass(TextInputFormat.class); + + if (asSingleMapTask) { + // One input path would mean only one map task + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); + createInputFile(path, writeCount); + TextInputFormat.setInputPaths(job, path); + } else { + // Create two input paths so that two map tasks get triggered. There could be other ways + // to trigger two map tasks. + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); + createInputFile(path, writeCount / 2); + + Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); + createInputFile(path2, (writeCount - writeCount / 2)); + + TextInputFormat.setInputPaths(job, path, path2); + } - job.setNumReduceTasks(0); + job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); + OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); + HCatOutputFormat.setOutput(job, outputJobInfo); - boolean success = job.waitForCompletion(true); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(DefaultHCatRecord.class); - // Ensure counters are set when data has actually been read. - if (partitionValues != null) { - assertTrue(job.getCounters().getGroup("FileSystemCounters") - .findCounter("FILE_BYTES_READ").getValue() > 0); - } + job.setNumReduceTasks(0); - if (!HCatUtil.isHadoop23()) { - // Local mode outputcommitter hook is not invoked in Hadoop 1.x - if (success) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } else { - new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); - } - } - if (assertWrite) { - // we assert only if we expected to assert with this call. - Assert.assertEquals(writeCount, MapCreate.writeCount); - } + HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); - return job; - } + boolean success = job.waitForCompletion(true); - List runMRRead(int readCount) throws Exception { - return runMRRead(readCount, null); + // Ensure counters are set when data has actually been read. + if (partitionValues != null) { + assertTrue(job.getCounters().getGroup("FileSystemCounters") + .findCounter("FILE_BYTES_READ").getValue() > 0); } - /** - * Run a local map reduce job to read records from HCatalog table and verify if the count is as expected - * @param readCount - * @param filter - * @return - * @throws Exception - */ - List runMRRead(int readCount, String filter) throws Exception { + if (!HCatUtil.isHadoop23()) { + // Local mode outputcommitter hook is not invoked in Hadoop 1.x + if (success) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } else { + new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); + } + } + if (assertWrite) { + // we assert only if we expected to assert with this call. + Assert.assertEquals(writeCount, MapCreate.writeCount); + } - MapRead.readCount = 0; - readRecords.clear(); + return job; + } - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce read test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(HCatMapReduceTest.MapRead.class); + List runMRRead(int readCount) throws Exception { + return runMRRead(readCount, null); + } - // input/output settings - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); + /** + * Run a local map reduce job to read records from HCatalog table and verify if the count is as expected + * @param readCount + * @param filter + * @return + * @throws Exception + */ + List runMRRead(int readCount, String filter) throws Exception { - HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter); + MapRead.readCount = 0; + readRecords.clear(); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce read test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(HCatMapReduceTest.MapRead.class); - job.setNumReduceTasks(0); + // input/output settings + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); - if (fs.exists(path)) { - fs.delete(path, true); - } + HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter); - TextOutputFormat.setOutputPath(job, path); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); - job.waitForCompletion(true); - Assert.assertEquals(readCount, MapRead.readCount); + job.setNumReduceTasks(0); - return readRecords; + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); + if (fs.exists(path)) { + fs.delete(path, true); } + TextOutputFormat.setOutputPath(job, path); - protected HCatSchema getTableSchema() throws Exception { + job.waitForCompletion(true); + Assert.assertEquals(readCount, MapRead.readCount); - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce read schema test"); - job.setJarByClass(this.getClass()); + return readRecords; + } - // input/output settings - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - HCatInputFormat.setInput(job, dbName, tableName); + protected HCatSchema getTableSchema() throws Exception { - return HCatInputFormat.getTableSchema(job); - } + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce read schema test"); + job.setJarByClass(this.getClass()); + + // input/output settings + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + HCatInputFormat.setInput(job, dbName, tableName); + + return HCatInputFormat.getTableSchema(job); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java index f67b1d4..6ca2c6d 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java @@ -50,162 +50,162 @@ */ public class TestHCatDynamicPartitioned extends HCatMapReduceTest { - private static List writeRecords; - private static List dataColumns; - private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class); - protected static final int NUM_RECORDS = 20; - protected static final int NUM_PARTITIONS = 5; - - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testHCatDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); + private static List writeRecords; + private static List dataColumns; + private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class); + protected static final int NUM_RECORDS = 20; + protected static final int NUM_PARTITIONS = 5; + + @BeforeClass + public static void generateInputData() throws Exception { + tableName = "testHCatDynamicPartitionedTable"; + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + generateDataColumns(); + } + + protected static void generateDataColumns() throws HCatException { + dataColumns = new ArrayList(); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, ""))); + } + + protected static void generateWriteRecords(int max, int mod, int offset) { + writeRecords = new ArrayList(); + + for (int i = 0; i < max; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("strvalue" + i); + objList.add(String.valueOf((i % mod) + offset)); + writeRecords.add(new DefaultHCatRecord(objList)); } - - protected static void generateDataColumns() throws HCatException { - dataColumns = new ArrayList(); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, ""))); + } + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + fields.add(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + /** + * Run the dynamic partitioning test but with single map task + * @throws Exception + */ + @Test + public void testHCatDynamicPartitionedTable() throws Exception { + runHCatDynamicPartitionedTable(true); + } + + /** + * Run the dynamic partitioning test but with multiple map task. See HCATALOG-490 + * @throws Exception + */ + @Test + public void testHCatDynamicPartitionedTableMultipleTask() throws Exception { + runHCatDynamicPartitionedTable(false); + } + + protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask) throws Exception { + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask); + + runMRRead(NUM_RECORDS); + + //Read with partition filter + runMRRead(4, "p1 = \"0\""); + runMRRead(8, "p1 = \"1\" or p1 = \"3\""); + runMRRead(4, "p1 = \"4\""); + + // read from hive to test + + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } - protected static void generateWriteRecords(int max, int mod, int offset) { - writeRecords = new ArrayList(); + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_RECORDS, res.size()); - for (int i = 0; i < max; i++) { - List objList = new ArrayList(); - objList.add(i); - objList.add("strvalue" + i); - objList.add(String.valueOf((i % mod) + offset)); - writeRecords.add(new DefaultHCatRecord(objList)); - } - } + //Test for duplicate publish + IOException exc = null; + try { + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false); - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - fields.add(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + if (HCatUtil.isHadoop23()) { + Assert.assertTrue(job.isSuccessful()==false); + } + } catch (IOException e) { + exc = e; } - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + if (!HCatUtil.isHadoop23()) { + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() + + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED", + (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) + || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) + ); } - /** - * Run the dynamic partitioning test but with single map task - * @throws Exception - */ - @Test - public void testHCatDynamicPartitionedTable() throws Exception { - runHCatDynamicPartitionedTable(true); + query = "show partitions " + tableName; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } - - /** - * Run the dynamic partitioning test but with multiple map task. See HCATALOG-490 - * @throws Exception - */ - @Test - public void testHCatDynamicPartitionedTableMultipleTask() throws Exception { - runHCatDynamicPartitionedTable(false); + res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_PARTITIONS, res.size()); + + query = "select * from " + tableName; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } + res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_RECORDS, res.size()); + } - protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask) throws Exception { - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask); - - runMRRead(NUM_RECORDS); - - //Read with partition filter - runMRRead(4, "p1 = \"0\""); - runMRRead(8, "p1 = \"1\" or p1 = \"3\""); - runMRRead(4, "p1 = \"4\""); - - // read from hive to test - - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_RECORDS, res.size()); - - - //Test for duplicate publish - IOException exc = null; - try { - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false); - - if (HCatUtil.isHadoop23()) { - Assert.assertTrue(job.isSuccessful()==false); - } - } catch (IOException e) { - exc = e; - } - - if (!HCatUtil.isHadoop23()) { - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() - + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED", - (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) - || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) - ); - } - - query = "show partitions " + tableName; - retCode = driver.run(query).getResponseCode(); - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_PARTITIONS, res.size()); - - query = "select * from " + tableName; - retCode = driver.run(query).getResponseCode(); - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_RECORDS, res.size()); + //TODO 1.0 miniCluster is slow this test times out, make it work +// renaming test to make test framework skip it + public void _testHCatDynamicPartitionMaxPartitions() throws Exception { + HiveConf hc = new HiveConf(this.getClass()); + + int maxParts = hiveConf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); + LOG.info("Max partitions allowed = {}", maxParts); + + IOException exc = null; + try { + generateWriteRecords(maxParts + 5, maxParts + 2, 10); + runMRCreate(null, dataColumns, writeRecords, maxParts + 5, false); + } catch (IOException e) { + exc = e; } - //TODO 1.0 miniCluster is slow this test times out, make it work -// renaming test to make test framework skip it - public void _testHCatDynamicPartitionMaxPartitions() throws Exception { - HiveConf hc = new HiveConf(this.getClass()); - - int maxParts = hiveConf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); - LOG.info("Max partitions allowed = {}", maxParts); - - IOException exc = null; - try { - generateWriteRecords(maxParts + 5, maxParts + 2, 10); - runMRCreate(null, dataColumns, writeRecords, maxParts + 5, false); - } catch (IOException e) { - exc = e; - } - - if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, ((HCatException) exc).getErrorType()); - } else { - assertTrue(exc == null); - runMRRead(maxParts + 5); - } + if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, ((HCatException) exc).getErrorType()); + } else { + assertTrue(exc == null); + runMRRead(maxParts + 5); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveCompatibility.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveCompatibility.java index c3cace6..e1e7669 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveCompatibility.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveCompatibility.java @@ -38,95 +38,95 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.TestHCatHiveCompatibility} instead */ public class TestHCatHiveCompatibility extends HCatBaseTest { - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - @BeforeClass - public static void createInputData() throws Exception { - int LOOP_SIZE = 11; - File file = new File(INPUT_FILE_NAME); - file.deleteOnExit(); - FileWriter writer = new FileWriter(file); - for (int i = 0; i < LOOP_SIZE; i++) { - writer.write(i + "\t1\n"); - } - writer.close(); + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + @BeforeClass + public static void createInputData() throws Exception { + int LOOP_SIZE = 11; + File file = new File(INPUT_FILE_NAME); + file.deleteOnExit(); + FileWriter writer = new FileWriter(file); + for (int i = 0; i < LOOP_SIZE; i++) { + writer.write(i + "\t1\n"); } + writer.close(); + } - @Test - public void testUnpartedReadWrite() throws Exception { + @Test + public void testUnpartedReadWrite() throws Exception { - driver.run("drop table if exists junit_unparted_noisd"); - String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + driver.run("drop table if exists junit_unparted_noisd"); + String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - // assert that the table created has no hcat instrumentation, and that we're still able to read it. - Table table = client.getTable("default", "junit_unparted_noisd"); - Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created has no hcat instrumentation, and that we're still able to read it. + Table table = client.getTable("default", "junit_unparted_noisd"); + Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - PigServer server = new PigServer(ExecType.LOCAL); - logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hcatalog.pig.HCatStorer();"); - logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator itr = server.openIterator("B"); + PigServer server = new PigServer(ExecType.LOCAL); + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hcatalog.pig.HCatStorer();"); + logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hcatalog.pig.HCatLoader();"); + Iterator itr = server.openIterator("B"); - int i = 0; + int i = 0; - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(1, t.size()); - Assert.assertEquals(t.get(0), i); - i++; - } + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(1, t.size()); + Assert.assertEquals(t.get(0), i); + i++; + } - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); - // assert that the table created still has no hcat instrumentation - Table table2 = client.getTable("default", "junit_unparted_noisd"); - Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created still has no hcat instrumentation + Table table2 = client.getTable("default", "junit_unparted_noisd"); + Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - driver.run("drop table junit_unparted_noisd"); - } + driver.run("drop table junit_unparted_noisd"); + } - @Test - public void testPartedRead() throws Exception { + @Test + public void testPartedRead() throws Exception { - driver.run("drop table if exists junit_parted_noisd"); - String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + driver.run("drop table if exists junit_parted_noisd"); + String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - // assert that the table created has no hcat instrumentation, and that we're still able to read it. - Table table = client.getTable("default", "junit_parted_noisd"); - Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created has no hcat instrumentation, and that we're still able to read it. + Table table = client.getTable("default", "junit_parted_noisd"); + Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - PigServer server = new PigServer(ExecType.LOCAL); - logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hcatalog.pig.HCatStorer('b=42');"); - logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator itr = server.openIterator("B"); + PigServer server = new PigServer(ExecType.LOCAL); + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hcatalog.pig.HCatStorer('b=42');"); + logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hcatalog.pig.HCatLoader();"); + Iterator itr = server.openIterator("B"); - int i = 0; + int i = 0; - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); // Contains explicit field "a" and partition "b". - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "42"); - i++; - } + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); // Contains explicit field "a" and partition "b". + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "42"); + i++; + } - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); - // assert that the table created still has no hcat instrumentation - Table table2 = client.getTable("default", "junit_parted_noisd"); - Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created still has no hcat instrumentation + Table table2 = client.getTable("default", "junit_parted_noisd"); + Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - // assert that there is one partition present, and it had hcat instrumentation inserted when it was created. - Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42")); + // assert that there is one partition present, and it had hcat instrumentation inserted when it was created. + Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42")); - Assert.assertNotNull(ptn); - Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - driver.run("drop table junit_unparted_noisd"); - } + Assert.assertNotNull(ptn); + Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + driver.run("drop table junit_unparted_noisd"); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java index eebc16f..28c11b8 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java @@ -43,77 +43,77 @@ */ public class TestHCatHiveThriftCompatibility extends HCatBaseTest { - private boolean setUpComplete = false; - private Path intStringSeq; - - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - if (setUpComplete) { - return; - } - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - TIOStreamTransport transport = new TIOStreamTransport(out); - TBinaryProtocol protocol = new TBinaryProtocol(transport); - - IntString intString = new IntString(1, "one", 1); - intString.write(protocol); - BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); - - intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); - LOG.info("Creating data file: " + intStringSeq); - - SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( - intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, - NullWritable.class, BytesWritable.class); - seqFileWriter.append(NullWritable.get(), bytesWritable); - seqFileWriter.close(); - - setUpComplete = true; + private boolean setUpComplete = false; + private Path intStringSeq; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + if (setUpComplete) { + return; } - /** - * Create a table with no explicit schema and ensure its correctly - * discovered from the thrift struct. - */ - @Test - public void testDynamicCols() throws Exception { - Assert.assertEquals(0, driver.run("drop table if exists test_thrift").getResponseCode()); - Assert.assertEquals(0, driver.run( - "create external table test_thrift " + - "partitioned by (year string) " + - "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + - "with serdeproperties ( " + - " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + - " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + - "stored as" + - " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + - " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") - .getResponseCode()); - Assert.assertEquals(0, - driver.run("alter table test_thrift add partition (year = '2012') location '" + - intStringSeq.getParent() + "'").getResponseCode()); - - PigServer pigServer = new PigServer(ExecType.LOCAL); - pigServer.registerQuery("A = load 'test_thrift' using org.apache.hcatalog.pig.HCatLoader();"); - - Schema expectedSchema = new Schema(); - expectedSchema.add(new Schema.FieldSchema("myint", DataType.INTEGER)); - expectedSchema.add(new Schema.FieldSchema("mystring", DataType.CHARARRAY)); - expectedSchema.add(new Schema.FieldSchema("underscore_int", DataType.INTEGER)); - expectedSchema.add(new Schema.FieldSchema("year", DataType.CHARARRAY)); - - Assert.assertEquals(expectedSchema, pigServer.dumpSchema("A")); - - Iterator iterator = pigServer.openIterator("A"); - Tuple t = iterator.next(); - Assert.assertEquals(1, t.get(0)); - Assert.assertEquals("one", t.get(1)); - Assert.assertEquals(1, t.get(2)); - Assert.assertEquals("2012", t.get(3)); - - Assert.assertFalse(iterator.hasNext()); - } + ByteArrayOutputStream out = new ByteArrayOutputStream(); + TIOStreamTransport transport = new TIOStreamTransport(out); + TBinaryProtocol protocol = new TBinaryProtocol(transport); + + IntString intString = new IntString(1, "one", 1); + intString.write(protocol); + BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); + + intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); + LOG.info("Creating data file: " + intStringSeq); + + SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( + intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, + NullWritable.class, BytesWritable.class); + seqFileWriter.append(NullWritable.get(), bytesWritable); + seqFileWriter.close(); + + setUpComplete = true; + } + + /** + * Create a table with no explicit schema and ensure its correctly + * discovered from the thrift struct. + */ + @Test + public void testDynamicCols() throws Exception { + Assert.assertEquals(0, driver.run("drop table if exists test_thrift").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create external table test_thrift " + + "partitioned by (year string) " + + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + + "with serdeproperties ( " + + " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + + " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + + "stored as" + + " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + + " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") + .getResponseCode()); + Assert.assertEquals(0, + driver.run("alter table test_thrift add partition (year = '2012') location '" + + intStringSeq.getParent() + "'").getResponseCode()); + + PigServer pigServer = new PigServer(ExecType.LOCAL); + pigServer.registerQuery("A = load 'test_thrift' using org.apache.hcatalog.pig.HCatLoader();"); + + Schema expectedSchema = new Schema(); + expectedSchema.add(new Schema.FieldSchema("myint", DataType.INTEGER)); + expectedSchema.add(new Schema.FieldSchema("mystring", DataType.CHARARRAY)); + expectedSchema.add(new Schema.FieldSchema("underscore_int", DataType.INTEGER)); + expectedSchema.add(new Schema.FieldSchema("year", DataType.CHARARRAY)); + + Assert.assertEquals(expectedSchema, pigServer.dumpSchema("A")); + + Iterator iterator = pigServer.openIterator("A"); + Tuple t = iterator.next(); + Assert.assertEquals(1, t.get(0)); + Assert.assertEquals("one", t.get(1)); + Assert.assertEquals(1, t.get(2)); + Assert.assertEquals("2012", t.get(3)); + + Assert.assertFalse(iterator.hasNext()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatInputFormat.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatInputFormat.java index 2cd7065..dde94e2 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatInputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatInputFormat.java @@ -45,107 +45,107 @@ */ public class TestHCatInputFormat extends HCatBaseTest { - private boolean setUpComplete = false; - - /** - * Create an input sequence file with 100 records; every 10th record is bad. - * Load this table into Hive. - */ - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - if (setUpComplete) { - return; - } - - Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); - LOG.info("Creating data file: " + intStringSeq); - SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( - intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, - NullWritable.class, BytesWritable.class); - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - TIOStreamTransport transport = new TIOStreamTransport(out); - TBinaryProtocol protocol = new TBinaryProtocol(transport); - - for (int i = 1; i <= 100; i++) { - if (i % 10 == 0) { - seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes())); - } else { - out.reset(); - IntString intString = new IntString(i, Integer.toString(i), i); - intString.write(protocol); - BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); - seqFileWriter.append(NullWritable.get(), bytesWritable); - } - } - - seqFileWriter.close(); - - // Now let's load this file into a new Hive table. - Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode()); - Assert.assertEquals(0, driver.run( - "create table test_bad_records " + - "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + - "with serdeproperties ( " + - " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + - " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + - "stored as" + - " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + - " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") - .getResponseCode()); - Assert.assertEquals(0, driver.run("load data local inpath '" + intStringSeq.getParent() + - "' into table test_bad_records").getResponseCode()); - - setUpComplete = true; + private boolean setUpComplete = false; + + /** + * Create an input sequence file with 100 records; every 10th record is bad. + * Load this table into Hive. + */ + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + if (setUpComplete) { + return; } - @Test - public void testBadRecordHandlingPasses() throws Exception { - Assert.assertTrue(runJob(0.1f)); + Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); + LOG.info("Creating data file: " + intStringSeq); + SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( + intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, + NullWritable.class, BytesWritable.class); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + TIOStreamTransport transport = new TIOStreamTransport(out); + TBinaryProtocol protocol = new TBinaryProtocol(transport); + + for (int i = 1; i <= 100; i++) { + if (i % 10 == 0) { + seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes())); + } else { + out.reset(); + IntString intString = new IntString(i, Integer.toString(i), i); + intString.write(protocol); + BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); + seqFileWriter.append(NullWritable.get(), bytesWritable); + } } - @Test - public void testBadRecordHandlingFails() throws Exception { - Assert.assertFalse(runJob(0.01f)); - } + seqFileWriter.close(); + + // Now let's load this file into a new Hive table. + Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create table test_bad_records " + + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + + "with serdeproperties ( " + + " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + + " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + + "stored as" + + " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + + " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") + .getResponseCode()); + Assert.assertEquals(0, driver.run("load data local inpath '" + intStringSeq.getParent() + + "' into table test_bad_records").getResponseCode()); - private boolean runJob(float badRecordThreshold) throws Exception { - Configuration conf = new Configuration(); + setUpComplete = true; + } - conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); + @Test + public void testBadRecordHandlingPasses() throws Exception { + Assert.assertTrue(runJob(0.1f)); + } - Job job = new Job(conf); - job.setJarByClass(this.getClass()); - job.setMapperClass(MyMapper.class); + @Test + public void testBadRecordHandlingFails() throws Exception { + Assert.assertFalse(runJob(0.01f)); + } - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); + private boolean runJob(float badRecordThreshold) throws Exception { + Configuration conf = new Configuration(); - HCatInputFormat.setInput(job, "default", "test_bad_records"); + conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); - job.setMapOutputKeyClass(HCatRecord.class); - job.setMapOutputValueClass(HCatRecord.class); + Job job = new Job(conf); + job.setJarByClass(this.getClass()); + job.setMapperClass(MyMapper.class); - job.setNumReduceTasks(0); + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); - Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); - if (path.getFileSystem(conf).exists(path)) { - path.getFileSystem(conf).delete(path, true); - } + HCatInputFormat.setInput(job, "default", "test_bad_records"); - TextOutputFormat.setOutputPath(job, path); + job.setMapOutputKeyClass(HCatRecord.class); + job.setMapOutputValueClass(HCatRecord.class); - return job.waitForCompletion(true); + job.setNumReduceTasks(0); + + Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); + if (path.getFileSystem(conf).exists(path)) { + path.getFileSystem(conf).delete(path, true); } - public static class MyMapper extends Mapper { - @Override - public void map(NullWritable key, HCatRecord value, Context context) - throws IOException, InterruptedException { - LOG.info("HCatRecord: " + value); - context.write(NullWritable.get(), new Text(value.toString())); - } + TextOutputFormat.setOutputPath(job, path); + + return job.waitForCompletion(true); + } + + public static class MyMapper extends Mapper { + @Override + public void map(NullWritable key, HCatRecord value, Context context) + throws IOException, InterruptedException { + LOG.info("HCatRecord: " + value); + context.write(NullWritable.get(), new Text(value.toString())); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java index cefcdd6..2add2dc 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java @@ -76,358 +76,358 @@ */ public class TestHCatMultiOutputFormat { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatMultiOutputFormat.class); - - private static final String DATABASE = "default"; - private static final String[] tableNames = {"test1", "test2", "test3"}; - private static final String[] tablePerms = {"755", "750", "700"}; - private static Path warehousedir = null; - private static HashMap schemaMap = new HashMap(); - private static HiveMetaStoreClient hmsc; - private static MiniMRCluster mrCluster; - private static Configuration mrConf; - private static HiveConf hiveConf; - private static File workDir; - - private static final String msPort = "20199"; - private static Thread t; - - static { - schemaMap.put(tableNames[0], new HCatSchema(ColumnHolder.hCattest1Cols)); - schemaMap.put(tableNames[1], new HCatSchema(ColumnHolder.hCattest2Cols)); - schemaMap.put(tableNames[2], new HCatSchema(ColumnHolder.hCattest3Cols)); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatMultiOutputFormat.class); + + private static final String DATABASE = "default"; + private static final String[] tableNames = {"test1", "test2", "test3"}; + private static final String[] tablePerms = {"755", "750", "700"}; + private static Path warehousedir = null; + private static HashMap schemaMap = new HashMap(); + private static HiveMetaStoreClient hmsc; + private static MiniMRCluster mrCluster; + private static Configuration mrConf; + private static HiveConf hiveConf; + private static File workDir; + + private static final String msPort = "20199"; + private static Thread t; + + static { + schemaMap.put(tableNames[0], new HCatSchema(ColumnHolder.hCattest1Cols)); + schemaMap.put(tableNames[1], new HCatSchema(ColumnHolder.hCattest2Cols)); + schemaMap.put(tableNames[2], new HCatSchema(ColumnHolder.hCattest3Cols)); + } + + private static class RunMS implements Runnable { + + @Override + public void run() { + try { + String warehouseConf = HiveConf.ConfVars.METASTOREWAREHOUSE.varname + "=" + + warehousedir.toString(); + HiveMetaStore.main(new String[]{"-v", "-p", msPort, "--hiveconf", warehouseConf}); + } catch (Throwable t) { + System.err.println("Exiting. Got exception from metastore: " + t.getMessage()); + } } - private static class RunMS implements Runnable { - - @Override - public void run() { - try { - String warehouseConf = HiveConf.ConfVars.METASTOREWAREHOUSE.varname + "=" - + warehousedir.toString(); - HiveMetaStore.main(new String[]{"-v", "-p", msPort, "--hiveconf", warehouseConf}); - } catch (Throwable t) { - System.err.println("Exiting. Got exception from metastore: " + t.getMessage()); - } - } + } - } + /** + * Private class which holds all the data for the test cases + */ + private static class ColumnHolder { - /** - * Private class which holds all the data for the test cases - */ - private static class ColumnHolder { - - private static ArrayList hCattest1Cols = new ArrayList(); - private static ArrayList hCattest2Cols = new ArrayList(); - private static ArrayList hCattest3Cols = new ArrayList(); - - private static ArrayList partitionCols = new ArrayList(); - private static ArrayList test1Cols = new ArrayList(); - private static ArrayList test2Cols = new ArrayList(); - private static ArrayList test3Cols = new ArrayList(); - - private static HashMap> colMapping = new HashMap>(); - - static { - try { - FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); - test1Cols.add(keyCol); - test2Cols.add(keyCol); - test3Cols.add(keyCol); - hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); - test1Cols.add(valueCol); - test3Cols.add(valueCol); - hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); - FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); - test3Cols.add(extraCol); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); - colMapping.put("test1", test1Cols); - colMapping.put("test2", test2Cols); - colMapping.put("test3", test3Cols); - } catch (HCatException e) { - LOG.error("Error in setting up schema fields for the table", e); - throw new RuntimeException(e); - } - } + private static ArrayList hCattest1Cols = new ArrayList(); + private static ArrayList hCattest2Cols = new ArrayList(); + private static ArrayList hCattest3Cols = new ArrayList(); - static { - partitionCols.add(new FieldSchema("ds", serdeConstants.STRING_TYPE_NAME, "")); - partitionCols.add(new FieldSchema("cluster", serdeConstants.STRING_TYPE_NAME, "")); - } - } + private static ArrayList partitionCols = new ArrayList(); + private static ArrayList test1Cols = new ArrayList(); + private static ArrayList test2Cols = new ArrayList(); + private static ArrayList test3Cols = new ArrayList(); - @BeforeClass - public static void setup() throws Exception { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; - workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); - - warehousedir = new Path(workDir + "/warehouse"); - - // Run hive metastore server - t = new Thread(new RunMS()); - t.start(); - - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - FileSystem fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); - fs.mkdirs(warehousedir); - - initializeSetup(); - } + private static HashMap> colMapping = new HashMap>(); - private static void initializeSetup() throws Exception { - - hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); - try { - hmsc = new HiveMetaStoreClient(hiveConf, null); - initalizeTables(); - } catch (Throwable e) { - LOG.error("Exception encountered while setting up testcase", e); - throw new Exception(e); - } finally { - hmsc.close(); - } + static { + try { + FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); + test1Cols.add(keyCol); + test2Cols.add(keyCol); + test3Cols.add(keyCol); + hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); + test1Cols.add(valueCol); + test3Cols.add(valueCol); + hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); + FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); + test3Cols.add(extraCol); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); + colMapping.put("test1", test1Cols); + colMapping.put("test2", test2Cols); + colMapping.put("test3", test3Cols); + } catch (HCatException e) { + LOG.error("Error in setting up schema fields for the table", e); + throw new RuntimeException(e); + } } - private static void initalizeTables() throws Exception { - for (String table : tableNames) { - try { - if (hmsc.getTable(DATABASE, table) != null) { - hmsc.dropTable(DATABASE, table); - } - } catch (NoSuchObjectException ignored) { - } - } - for (int i = 0; i < tableNames.length; i++) { - createTable(tableNames[i], tablePerms[i]); - } + static { + partitionCols.add(new FieldSchema("ds", serdeConstants.STRING_TYPE_NAME, "")); + partitionCols.add(new FieldSchema("cluster", serdeConstants.STRING_TYPE_NAME, "")); } - - private static void createTable(String tableName, String tablePerm) throws Exception { - Table tbl = new Table(); - tbl.setDbName(DATABASE); - tbl.setTableName(tableName); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(ColumnHolder.colMapping.get(tableName)); - tbl.setSd(sd); - sd.setParameters(new HashMap()); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); - sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib( - org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); - tbl.setPartitionKeys(ColumnHolder.partitionCols); - - hmsc.createTable(tbl); - FileSystem fs = FileSystem.get(mrConf); - fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); + } + + @BeforeClass + public static void setup() throws Exception { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; + workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + + warehousedir = new Path(workDir + "/warehouse"); + + // Run hive metastore server + t = new Thread(new RunMS()); + t.start(); + + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + FileSystem fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + fs.mkdirs(warehousedir); + + initializeSetup(); + } + + private static void initializeSetup() throws Exception { + + hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); + try { + hmsc = new HiveMetaStoreClient(hiveConf, null); + initalizeTables(); + } catch (Throwable e) { + LOG.error("Exception encountered while setting up testcase", e); + throw new Exception(e); + } finally { + hmsc.close(); } + } - @AfterClass - public static void tearDown() throws IOException { - FileUtil.fullyDelete(workDir); - FileSystem fs = FileSystem.get(mrConf); - if (fs.exists(warehousedir)) { - fs.delete(warehousedir, true); - } - if (mrCluster != null) { - mrCluster.shutdown(); + private static void initalizeTables() throws Exception { + for (String table : tableNames) { + try { + if (hmsc.getTable(DATABASE, table) != null) { + hmsc.dropTable(DATABASE, table); } + } catch (NoSuchObjectException ignored) { + } } + for (int i = 0; i < tableNames.length; i++) { + createTable(tableNames[i], tablePerms[i]); + } + } + + private static void createTable(String tableName, String tablePerm) throws Exception { + Table tbl = new Table(); + tbl.setDbName(DATABASE); + tbl.setTableName(tableName); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(ColumnHolder.colMapping.get(tableName)); + tbl.setSd(sd); + sd.setParameters(new HashMap()); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); + sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib( + org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); + tbl.setPartitionKeys(ColumnHolder.partitionCols); + + hmsc.createTable(tbl); + FileSystem fs = FileSystem.get(mrConf); + fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); + } + + @AfterClass + public static void tearDown() throws IOException { + FileUtil.fullyDelete(workDir); + FileSystem fs = FileSystem.get(mrConf); + if (fs.exists(warehousedir)) { + fs.delete(warehousedir, true); + } + if (mrCluster != null) { + mrCluster.shutdown(); + } + } + + /** + * Simple test case. + *
    + *
  1. Submits a mapred job which writes out one fixed line to each of the tables
  2. + *
  3. uses hive fetch task to read the data and see if it matches what was written
  4. + *
+ * + * @throws Exception if any error occurs + */ + @Test + public void testOutputFormat() throws Throwable { + HashMap partitionValues = new HashMap(); + partitionValues.put("ds", "1"); + partitionValues.put("cluster", "ag"); + ArrayList infoList = new ArrayList(); + infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); + infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); + infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); + + Job job = new Job(hiveConf, "SampleJob"); + + job.setMapperClass(MyMapper.class); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setNumReduceTasks(0); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + + for (int i = 0; i < tableNames.length; i++) { + configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class, + HCatRecord.class); + HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); + HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), + schemaMap.get(tableNames[i])); + } + configurer.configure(); - /** - * Simple test case. - *
    - *
  1. Submits a mapred job which writes out one fixed line to each of the tables
  2. - *
  3. uses hive fetch task to read the data and see if it matches what was written
  4. - *
- * - * @throws Exception if any error occurs - */ - @Test - public void testOutputFormat() throws Throwable { - HashMap partitionValues = new HashMap(); - partitionValues.put("ds", "1"); - partitionValues.put("cluster", "ag"); - ArrayList infoList = new ArrayList(); - infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); - infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); - infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); - - Job job = new Job(hiveConf, "SampleJob"); - - job.setMapperClass(MyMapper.class); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setNumReduceTasks(0); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - - for (int i = 0; i < tableNames.length; i++) { - configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class, - HCatRecord.class); - HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); - HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), - schemaMap.get(tableNames[i])); - } - configurer.configure(); - - Path filePath = createInputFile(); - FileInputFormat.addInputPath(job, filePath); - Assert.assertTrue(job.waitForCompletion(true)); + Path filePath = createInputFile(); + FileInputFormat.addInputPath(job, filePath); + Assert.assertTrue(job.waitForCompletion(true)); - ArrayList outputs = new ArrayList(); - for (String tbl : tableNames) { - outputs.add(getTableData(tbl, "default").get(0)); - } - Assert.assertEquals("Comparing output of table " + - tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); - Assert.assertEquals("Comparing output of table " + - tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); - Assert.assertEquals("Comparing output of table " + - tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); - - // Check permisssion on partition dirs and files created - for (int i = 0; i < tableNames.length; i++) { - Path partitionFile = new Path(warehousedir + "/" + tableNames[i] - + "/ds=1/cluster=ag/part-m-00000"); - FileSystem fs = partitionFile.getFileSystem(mrConf); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile).getPermission(), - new FsPermission(tablePerms[i])); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile.getParent()).getPermission(), - new FsPermission(tablePerms[i])); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), - new FsPermission(tablePerms[i])); - - } - LOG.info("File permissions verified"); + ArrayList outputs = new ArrayList(); + for (String tbl : tableNames) { + outputs.add(getTableData(tbl, "default").get(0)); } + Assert.assertEquals("Comparing output of table " + + tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); + Assert.assertEquals("Comparing output of table " + + tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); + Assert.assertEquals("Comparing output of table " + + tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); + + // Check permisssion on partition dirs and files created + for (int i = 0; i < tableNames.length; i++) { + Path partitionFile = new Path(warehousedir + "/" + tableNames[i] + + "/ds=1/cluster=ag/part-m-00000"); + FileSystem fs = partitionFile.getFileSystem(mrConf); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile).getPermission(), + new FsPermission(tablePerms[i])); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile.getParent()).getPermission(), + new FsPermission(tablePerms[i])); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), + new FsPermission(tablePerms[i])); - /** - * Create a input file for map - * - * @return absolute path of the file. - * @throws IOException if any error encountered - */ - private Path createInputFile() throws IOException { - Path f = new Path(workDir + "/MultiTableInput.txt"); - FileSystem fs = FileSystem.get(mrConf); - if (fs.exists(f)) { - fs.delete(f, true); - } - OutputStream out = fs.create(f); - for (int i = 0; i < 3; i++) { - out.write("a,a\n".getBytes()); - } - out.close(); - return f; } - - /** - * Method to fetch table data - * - * @param table table name - * @param database database - * @return list of columns in comma seperated way - * @throws Exception if any error occurs - */ - private List getTableData(String table, String database) throws Exception { - HiveConf conf = new HiveConf(); - conf.addResource("hive-site.xml"); - ArrayList results = new ArrayList(); - ArrayList temp = new ArrayList(); - Hive hive = Hive.get(conf); - org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); - FetchWork work; - if (!tbl.getPartCols().isEmpty()) { - List partitions = hive.getPartitions(tbl); - List partDesc = new ArrayList(); - List partLocs = new ArrayList(); - for (Partition part : partitions) { - partLocs.add(part.getLocation()); - partDesc.add(Utilities.getPartitionDesc(part)); - } - work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); - work.setLimit(100); - } else { - work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); - } - FetchTask task = new FetchTask(); - task.setWork(work); - task.initialize(conf, null, null); - task.fetch(temp); - for (String str : temp) { - results.add(str.replace("\t", ",")); - } - return results; + LOG.info("File permissions verified"); + } + + /** + * Create a input file for map + * + * @return absolute path of the file. + * @throws IOException if any error encountered + */ + private Path createInputFile() throws IOException { + Path f = new Path(workDir + "/MultiTableInput.txt"); + FileSystem fs = FileSystem.get(mrConf); + if (fs.exists(f)) { + fs.delete(f, true); } - - private static class MyMapper extends - Mapper { - - private int i = 0; - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - HCatRecord record = null; - String[] splits = value.toString().split(","); - switch (i) { - case 0: - record = new DefaultHCatRecord(2); - record.set(0, splits[0]); - record.set(1, splits[1]); - break; - case 1: - record = new DefaultHCatRecord(1); - record.set(0, splits[0]); - break; - case 2: - record = new DefaultHCatRecord(3); - record.set(0, splits[0]); - record.set(1, splits[1]); - record.set(2, "extra"); - break; - default: - Assert.fail("This should not happen!!!!!"); - } - MultiOutputFormat.write(tableNames[i], null, record, context); - i++; - } + OutputStream out = fs.create(f); + for (int i = 0; i < 3; i++) { + out.write("a,a\n".getBytes()); + } + out.close(); + return f; + } + + /** + * Method to fetch table data + * + * @param table table name + * @param database database + * @return list of columns in comma seperated way + * @throws Exception if any error occurs + */ + private List getTableData(String table, String database) throws Exception { + HiveConf conf = new HiveConf(); + conf.addResource("hive-site.xml"); + ArrayList results = new ArrayList(); + ArrayList temp = new ArrayList(); + Hive hive = Hive.get(conf); + org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); + FetchWork work; + if (!tbl.getPartCols().isEmpty()) { + List partitions = hive.getPartitions(tbl); + List partDesc = new ArrayList(); + List partLocs = new ArrayList(); + for (Partition part : partitions) { + partLocs.add(part.getLocation()); + partDesc.add(Utilities.getPartitionDesc(part)); + } + work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); + work.setLimit(100); + } else { + work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); + } + FetchTask task = new FetchTask(); + task.setWork(work); + task.initialize(conf, null, null); + task.fetch(temp); + for (String str : temp) { + results.add(str.replace("\t", ",")); + } + return results; + } + + private static class MyMapper extends + Mapper { + + private int i = 0; + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + HCatRecord record = null; + String[] splits = value.toString().split(","); + switch (i) { + case 0: + record = new DefaultHCatRecord(2); + record.set(0, splits[0]); + record.set(1, splits[1]); + break; + case 1: + record = new DefaultHCatRecord(1); + record.set(0, splits[0]); + break; + case 2: + record = new DefaultHCatRecord(3); + record.set(0, splits[0]); + record.set(1, splits[1]); + record.set(2, "extra"); + break; + default: + Assert.fail("This should not happen!!!!!"); + } + MultiOutputFormat.write(tableNames[i], null, record, context); + i++; } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatNonPartitioned.java index 36bb9b5..7e40957 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatNonPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatNonPartitioned.java @@ -44,97 +44,97 @@ */ public class TestHCatNonPartitioned extends HCatMapReduceTest { - private static List writeRecords; - static List partitionColumns; + private static List writeRecords; + static List partitionColumns; - @BeforeClass - public static void oneTimeSetUp() throws Exception { + @BeforeClass + public static void oneTimeSetUp() throws Exception { - dbName = null; //test if null dbName works ("default" is used) - tableName = "testHCatNonPartitionedTable"; + dbName = null; //test if null dbName works ("default" is used) + tableName = "testHCatNonPartitionedTable"; - writeRecords = new ArrayList(); + writeRecords = new ArrayList(); - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); - objList.add(i); - objList.add("strvalue" + i); - writeRecords.add(new DefaultHCatRecord(objList)); - } - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - } - - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - //empty list, non partitioned - return fields; + objList.add(i); + objList.add("strvalue" + i); + writeRecords.add(new DefaultHCatRecord(objList)); } - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + } + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + //empty list, non partitioned + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + + @Test + public void testHCatNonPartitionedTable() throws Exception { + + Map partitionMap = new HashMap(); + runMRCreate(null, partitionColumns, writeRecords, 10, true); + + //Test for duplicate publish + IOException exc = null; + try { + runMRCreate(null, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_NON_EMPTY_TABLE, ((HCatException) exc).getErrorType()); - @Test - public void testHCatNonPartitionedTable() throws Exception { + //Test for publish with invalid partition key name + exc = null; + partitionMap.clear(); + partitionMap.put("px", "p1value2"); - Map partitionMap = new HashMap(); - runMRCreate(null, partitionColumns, writeRecords, 10, true); - - //Test for duplicate publish - IOException exc = null; - try { - runMRCreate(null, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_NON_EMPTY_TABLE, ((HCatException) exc).getErrorType()); + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); - //Test for publish with invalid partition key name - exc = null; - partitionMap.clear(); - partitionMap.put("px", "p1value2"); + //Read should get 10 rows + runMRRead(10); - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + hiveReadTest(); + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); + //Test that data inserted through hcatoutputformat is readable from hive + private void hiveReadTest() throws Exception { - //Read should get 10 rows - runMRRead(10); + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); - hiveReadTest(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } - //Test that data inserted through hcatoutputformat is readable from hive - private void hiveReadTest() throws Exception { - - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(10, res.size()); - } + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(10, res.size()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java index 53fa29e..661afc8 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java @@ -50,121 +50,121 @@ */ public class TestHCatOutputFormat extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatOutputFormat.class); - private HiveMetaStoreClient client; - private HiveConf hiveConf; - - private static final String dbName = "hcatOutputFormatTestDB"; - private static final String tblName = "hcatOutputFormatTestTable"; - - @Override - protected void setUp() throws Exception { - super.setUp(); - hiveConf = new HiveConf(this.getClass()); - - try { - client = new HiveMetaStoreClient(hiveConf, null); - - initTable(); - } catch (Throwable e) { - LOG.error("Unable to open the metastore", e); - throw new Exception(e); - } - } - - @Override - protected void tearDown() throws Exception { - try { - super.tearDown(); - client.dropTable(dbName, tblName); - client.dropDatabase(dbName); - - client.close(); - } catch (Throwable e) { - LOG.error("Unable to close metastore", e); - throw new Exception(e); - } - } - - private void initTable() throws Exception { - - try { - client.dropTable(dbName, tblName); - } catch (Exception e) { - } - try { - client.dropDatabase(dbName); - } catch (Exception e) { - } - client.createDatabase(new Database(dbName, "", null, null)); - assertNotNull((client.getDatabase(dbName).getLocationUri())); - - List fields = new ArrayList(); - fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); - - Table tbl = new Table(); - tbl.setDbName(dbName); - tbl.setTableName(tblName); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(fields); - tbl.setSd(sd); - - //sd.setLocation("hdfs://tmp"); - sd.setInputFormat(RCFileInputFormat.class.getName()); - sd.setOutputFormat(RCFileOutputFormat.class.getName()); - sd.setParameters(new HashMap()); - sd.getParameters().put("test_param_1", "Use this for comments etc"); - //sd.setBucketCols(new ArrayList(2)); - //sd.getBucketCols().add("name"); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib( - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); - tbl.setPartitionKeys(fields); - - Map tableParams = new HashMap(); - tableParams.put("hcat.testarg", "testArgValue"); - - tbl.setParameters(tableParams); - - client.createTable(tbl); - Path tblPath = new Path(client.getTable(dbName, tblName).getSd().getLocation()); - assertTrue(tblPath.getFileSystem(hiveConf).mkdirs(new Path(tblPath, "colname=p1"))); - - } - - public void testSetOutput() throws Exception { - Configuration conf = new Configuration(); - Job job = new Job(conf, "test outputformat"); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatOutputFormat.class); + private HiveMetaStoreClient client; + private HiveConf hiveConf; - Map partitionValues = new HashMap(); - partitionValues.put("colname", "p1"); - //null server url means local mode - OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); + private static final String dbName = "hcatOutputFormatTestDB"; + private static final String tblName = "hcatOutputFormatTestTable"; - HCatOutputFormat.setOutput(job, info); - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job); + @Override + protected void setUp() throws Exception { + super.setUp(); + hiveConf = new HiveConf(this.getClass()); - assertNotNull(jobInfo.getTableInfo()); - assertEquals(1, jobInfo.getPartitionValues().size()); - assertEquals("p1", jobInfo.getPartitionValues().get("colname")); - assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); - assertEquals("colname", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); + try { + client = new HiveMetaStoreClient(hiveConf, null); - publishTest(job); + initTable(); + } catch (Throwable e) { + LOG.error("Unable to open the metastore", e); + throw new Exception(e); } + } + + @Override + protected void tearDown() throws Exception { + try { + super.tearDown(); + client.dropTable(dbName, tblName); + client.dropDatabase(dbName); + + client.close(); + } catch (Throwable e) { + LOG.error("Unable to close metastore", e); + throw new Exception(e); + } + } - public void publishTest(Job job) throws Exception { - OutputCommitter committer = new FileOutputCommitterContainer(job, null); - committer.commitJob(job); - - Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); - assertNotNull(part); + private void initTable() throws Exception { - StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); - assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); - assertTrue(part.getSd().getLocation().indexOf("p1") != -1); + try { + client.dropTable(dbName, tblName); + } catch (Exception e) { + } + try { + client.dropDatabase(dbName); + } catch (Exception e) { } + client.createDatabase(new Database(dbName, "", null, null)); + assertNotNull((client.getDatabase(dbName).getLocationUri())); + + List fields = new ArrayList(); + fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); + + Table tbl = new Table(); + tbl.setDbName(dbName); + tbl.setTableName(tblName); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(fields); + tbl.setSd(sd); + + //sd.setLocation("hdfs://tmp"); + sd.setInputFormat(RCFileInputFormat.class.getName()); + sd.setOutputFormat(RCFileOutputFormat.class.getName()); + sd.setParameters(new HashMap()); + sd.getParameters().put("test_param_1", "Use this for comments etc"); + //sd.setBucketCols(new ArrayList(2)); + //sd.getBucketCols().add("name"); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib( + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); + tbl.setPartitionKeys(fields); + + Map tableParams = new HashMap(); + tableParams.put("hcat.testarg", "testArgValue"); + + tbl.setParameters(tableParams); + + client.createTable(tbl); + Path tblPath = new Path(client.getTable(dbName, tblName).getSd().getLocation()); + assertTrue(tblPath.getFileSystem(hiveConf).mkdirs(new Path(tblPath, "colname=p1"))); + + } + + public void testSetOutput() throws Exception { + Configuration conf = new Configuration(); + Job job = new Job(conf, "test outputformat"); + + Map partitionValues = new HashMap(); + partitionValues.put("colname", "p1"); + //null server url means local mode + OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); + + HCatOutputFormat.setOutput(job, info); + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job); + + assertNotNull(jobInfo.getTableInfo()); + assertEquals(1, jobInfo.getPartitionValues().size()); + assertEquals("p1", jobInfo.getPartitionValues().get("colname")); + assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); + assertEquals("colname", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); + + publishTest(job); + } + + public void publishTest(Job job) throws Exception { + OutputCommitter committer = new FileOutputCommitterContainer(job, null); + committer.commitJob(job); + + Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); + assertNotNull(part); + + StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); + assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); + assertTrue(part.getSd().getLocation().indexOf("p1") != -1); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitionPublish.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitionPublish.java index 2e34638..7bd1b4d 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitionPublish.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitionPublish.java @@ -66,194 +66,194 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.TestHCatPartitionPublish} instead */ public class TestHCatPartitionPublish { - private static Configuration mrConf = null; - private static FileSystem fs = null; - private static MiniMRCluster mrCluster = null; - private static boolean isServerRunning = false; - private static final int msPort = 20101; - private static HiveConf hcatConf; - private static HiveMetaStoreClient msc; - private static SecurityManager securityManager; - - @BeforeClass - public static void setup() throws Exception { - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(fs.getWorkingDirectory() - .toString(), "/logs").getAbsolutePath()); - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); - - if (isServerRunning) { - return; - } - - MetaStoreUtils.startMetaStore(msPort, ShimLoader - .getHadoopThriftAuthBridge()); - isServerRunning = true; - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - - hcatConf = new HiveConf(TestHCatPartitionPublish.class); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - msc = new HiveMetaStoreClient(hcatConf, null); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + private static Configuration mrConf = null; + private static FileSystem fs = null; + private static MiniMRCluster mrCluster = null; + private static boolean isServerRunning = false; + private static final int msPort = 20101; + private static HiveConf hcatConf; + private static HiveMetaStoreClient msc; + private static SecurityManager securityManager; + + @BeforeClass + public static void setup() throws Exception { + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(fs.getWorkingDirectory() + .toString(), "/logs").getAbsolutePath()); + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + + if (isServerRunning) { + return; } - @AfterClass - public static void tearDown() throws IOException { - if (mrCluster != null) { - mrCluster.shutdown(); - } - System.setSecurityManager(securityManager); - isServerRunning = false; + MetaStoreUtils.startMetaStore(msPort, ShimLoader + .getHadoopThriftAuthBridge()); + isServerRunning = true; + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + + hcatConf = new HiveConf(TestHCatPartitionPublish.class); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + msc = new HiveMetaStoreClient(hcatConf, null); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + @AfterClass + public static void tearDown() throws IOException { + if (mrCluster != null) { + mrCluster.shutdown(); } - - @Test - public void testPartitionPublish() throws Exception { - String dbName = "default"; - String tableName = "testHCatPartitionedTable"; - createTable(null, tableName); - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value1"); - partitionMap.put("part0", "p0value1"); - - ArrayList hcatTableColumns = new ArrayList(); - for (FieldSchema fs : getTableColumns()) { - hcatTableColumns.add(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - - runMRCreateFail(dbName, tableName, partitionMap, hcatTableColumns); - List ptns = msc.listPartitionNames(dbName, tableName, - (short) 10); - Assert.assertEquals(0, ptns.size()); - Table table = msc.getTable(dbName, tableName); - Assert.assertTrue(table != null); - // Also make sure that the directory has been deleted in the table - // location. - Assert.assertFalse(fs.exists(new Path(table.getSd().getLocation() - + "/part1=p1value1/part0=p0value1"))); - } - - void runMRCreateFail( - String dbName, String tableName, Map partitionValues, - List columns) throws Exception { - - Job job = new Job(mrConf, "hcat mapreduce write fail test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(TestHCatPartitionPublish.MapFail.class); - - // input/output settings - job.setInputFormatClass(TextInputFormat.class); - - Path path = new Path(fs.getWorkingDirectory(), - "mapred/testHCatMapReduceInput"); - // The write count does not matter, as the map will fail in its first - // call. - createInputFile(path, 5); - - TextInputFormat.setInputPaths(job, path); - job.setOutputFormatClass(HCatOutputFormat.class); - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, - partitionValues); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(DefaultHCatRecord.class); - - job.setNumReduceTasks(0); - - HCatOutputFormat.setSchema(job, new HCatSchema(columns)); - - boolean success = job.waitForCompletion(true); - Assert.assertTrue(success == false); + System.setSecurityManager(securityManager); + isServerRunning = false; + } + + @Test + public void testPartitionPublish() throws Exception { + String dbName = "default"; + String tableName = "testHCatPartitionedTable"; + createTable(null, tableName); + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value1"); + partitionMap.put("part0", "p0value1"); + + ArrayList hcatTableColumns = new ArrayList(); + for (FieldSchema fs : getTableColumns()) { + hcatTableColumns.add(HCatSchemaUtils.getHCatFieldSchema(fs)); } - private void createInputFile(Path path, int rowCount) throws IOException { - if (fs.exists(path)) { - fs.delete(path, true); - } - FSDataOutputStream os = fs.create(path); - for (int i = 0; i < rowCount; i++) { - os.writeChars(i + "\n"); - } - os.close(); + runMRCreateFail(dbName, tableName, partitionMap, hcatTableColumns); + List ptns = msc.listPartitionNames(dbName, tableName, + (short) 10); + Assert.assertEquals(0, ptns.size()); + Table table = msc.getTable(dbName, tableName); + Assert.assertTrue(table != null); + // Also make sure that the directory has been deleted in the table + // location. + Assert.assertFalse(fs.exists(new Path(table.getSd().getLocation() + + "/part1=p1value1/part0=p0value1"))); + } + + void runMRCreateFail( + String dbName, String tableName, Map partitionValues, + List columns) throws Exception { + + Job job = new Job(mrConf, "hcat mapreduce write fail test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(TestHCatPartitionPublish.MapFail.class); + + // input/output settings + job.setInputFormatClass(TextInputFormat.class); + + Path path = new Path(fs.getWorkingDirectory(), + "mapred/testHCatMapReduceInput"); + // The write count does not matter, as the map will fail in its first + // call. + createInputFile(path, 5); + + TextInputFormat.setInputPaths(job, path); + job.setOutputFormatClass(HCatOutputFormat.class); + OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, + partitionValues); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(DefaultHCatRecord.class); + + job.setNumReduceTasks(0); + + HCatOutputFormat.setSchema(job, new HCatSchema(columns)); + + boolean success = job.waitForCompletion(true); + Assert.assertTrue(success == false); + } + + private void createInputFile(Path path, int rowCount) throws IOException { + if (fs.exists(path)) { + fs.delete(path, true); } - - public static class MapFail extends - Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - { - throw new IOException("Exception to mimic job failure."); - } - } + FSDataOutputStream os = fs.create(path); + for (int i = 0; i < rowCount; i++) { + os.writeChars(i + "\n"); } - - private void createTable(String dbName, String tableName) throws Exception { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME - : dbName; - try { - msc.dropTable(databaseName, tableName); - } catch (Exception e) { - } // can fail with NoSuchObjectException - - Table tbl = new Table(); - tbl.setDbName(databaseName); - tbl.setTableName(tableName); - tbl.setTableType("MANAGED_TABLE"); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(getTableColumns()); - tbl.setPartitionKeys(getPartitionKeys()); - tbl.setSd(sd); - sd.setBucketCols(new ArrayList(2)); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib(ColumnarSerDe.class.getName()); - sd.setInputFormat(RCFileInputFormat.class.getName()); - sd.setOutputFormat(RCFileOutputFormat.class.getName()); - - Map tableParams = new HashMap(); - tbl.setParameters(tableParams); - - msc.createTable(tbl); - } - - protected List getPartitionKeys() { - List fields = new ArrayList(); - // Defining partition names in unsorted order - fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); - fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + os.close(); + } + + public static class MapFail extends + Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + { + throw new IOException("Exception to mimic job failure."); + } } + } + + private void createTable(String dbName, String tableName) throws Exception { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME + : dbName; + try { + msc.dropTable(databaseName, tableName); + } catch (Exception e) { + } // can fail with NoSuchObjectException + + Table tbl = new Table(); + tbl.setDbName(databaseName); + tbl.setTableName(tableName); + tbl.setTableType("MANAGED_TABLE"); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(getTableColumns()); + tbl.setPartitionKeys(getPartitionKeys()); + tbl.setSd(sd); + sd.setBucketCols(new ArrayList(2)); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib(ColumnarSerDe.class.getName()); + sd.setInputFormat(RCFileInputFormat.class.getName()); + sd.setOutputFormat(RCFileOutputFormat.class.getName()); + + Map tableParams = new HashMap(); + tbl.setParameters(tableParams); + + msc.createTable(tbl); + } + + protected List getPartitionKeys() { + List fields = new ArrayList(); + // Defining partition names in unsorted order + fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); + fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java index d9b25d9..b2c7443 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java @@ -45,310 +45,310 @@ */ public class TestHCatPartitioned extends HCatMapReduceTest { - private static List writeRecords; - private static List partitionColumns; + private static List writeRecords; + private static List partitionColumns; - @BeforeClass - public static void oneTimeSetUp() throws Exception { + @BeforeClass + public static void oneTimeSetUp() throws Exception { - tableName = "testHCatPartitionedTable"; - writeRecords = new ArrayList(); + tableName = "testHCatPartitionedTable"; + writeRecords = new ArrayList(); - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); - objList.add(i); - objList.add("strvalue" + i); - writeRecords.add(new DefaultHCatRecord(objList)); - } + objList.add(i); + objList.add("strvalue" + i); + writeRecords.add(new DefaultHCatRecord(objList)); + } + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + } + + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + //Defining partition names in unsorted order + fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); + fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + + @Test + public void testHCatPartitionedTable() throws Exception { + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value1"); + partitionMap.put("part0", "p0value1"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + partitionMap.clear(); + partitionMap.put("PART1", "p1value2"); + partitionMap.put("PART0", "p0value2"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + + //Test for duplicate publish + IOException exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType()); + + //Test for publish with invalid partition key name + exc = null; + partitionMap.clear(); + partitionMap.put("px1", "p1value2"); + partitionMap.put("px0", "p0value2"); - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - //Defining partition names in unsorted order - fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); - fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType()); + + //Test for publish with missing partition key values + exc = null; + partitionMap.clear(); + partitionMap.put("px", "p1value2"); + + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); - @Test - public void testHCatPartitionedTable() throws Exception { - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value1"); - partitionMap.put("part0", "p0value1"); + //Test for null partition value map + exc = null; + try { + runMRCreate(null, partitionColumns, writeRecords, 20, false); + } catch (IOException e) { + exc = e; + } - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + assertTrue(exc == null); +// assertTrue(exc instanceof HCatException); +// assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType()); + // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values - partitionMap.clear(); - partitionMap.put("PART1", "p1value2"); - partitionMap.put("PART0", "p0value2"); + //Read should get 10 + 20 rows + runMRRead(30); - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + //Read with partition filter + runMRRead(10, "part1 = \"p1value1\""); + runMRRead(20, "part1 = \"p1value2\""); + runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\""); + runMRRead(10, "part0 = \"p0value1\""); + runMRRead(20, "part0 = \"p0value2\""); + runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\""); - //Test for duplicate publish - IOException exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + tableSchemaTest(); + columnOrderChangeTest(); + hiveReadTest(); + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType()); - //Test for publish with invalid partition key name - exc = null; - partitionMap.clear(); - partitionMap.put("px1", "p1value2"); - partitionMap.put("px0", "p0value2"); + //test that new columns gets added to table schema + private void tableSchemaTest() throws Exception { - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + HCatSchema tableSchema = getTableSchema(); - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType()); + assertEquals(4, tableSchema.getFields().size()); - //Test for publish with missing partition key values - exc = null; - partitionMap.clear(); - partitionMap.put("px", "p1value2"); + //Update partition schema to have 3 fields + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + writeRecords = new ArrayList(); - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + objList.add(i); + objList.add("strvalue" + i); + objList.add("str2value" + i); - //Test for null partition value map - exc = null; - try { - runMRCreate(null, partitionColumns, writeRecords, 20, false); - } catch (IOException e) { - exc = e; - } + writeRecords.add(new DefaultHCatRecord(objList)); + } - assertTrue(exc == null); -// assertTrue(exc instanceof HCatException); -// assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType()); - // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values - - //Read should get 10 + 20 rows - runMRRead(30); - - //Read with partition filter - runMRRead(10, "part1 = \"p1value1\""); - runMRRead(20, "part1 = \"p1value2\""); - runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\""); - runMRRead(10, "part0 = \"p0value1\""); - runMRRead(20, "part0 = \"p0value2\""); - runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\""); - - tableSchemaTest(); - columnOrderChangeTest(); - hiveReadTest(); + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value5"); + partitionMap.put("part0", "p0value5"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + + tableSchema = getTableSchema(); + + //assert that c3 has got added to table schema + assertEquals(5, tableSchema.getFields().size()); + assertEquals("c1", tableSchema.getFields().get(0).getName()); + assertEquals("c2", tableSchema.getFields().get(1).getName()); + assertEquals("c3", tableSchema.getFields().get(2).getName()); + assertEquals("part1", tableSchema.getFields().get(3).getName()); + assertEquals("part0", tableSchema.getFields().get(4).getName()); + + //Test that changing column data type fails + partitionMap.clear(); + partitionMap.put("part1", "p1value6"); + partitionMap.put("part0", "p0value6"); + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); + + IOException exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); + + //Test that partition key is not allowed in data + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); - //test that new columns gets added to table schema - private void tableSchemaTest() throws Exception { - - HCatSchema tableSchema = getTableSchema(); - - assertEquals(4, tableSchema.getFields().size()); - - //Update partition schema to have 3 fields - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - - writeRecords = new ArrayList(); - - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("strvalue" + i); - objList.add("str2value" + i); - - writeRecords.add(new DefaultHCatRecord(objList)); - } - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value5"); - partitionMap.put("part0", "p0value5"); - - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - - tableSchema = getTableSchema(); - - //assert that c3 has got added to table schema - assertEquals(5, tableSchema.getFields().size()); - assertEquals("c1", tableSchema.getFields().get(0).getName()); - assertEquals("c2", tableSchema.getFields().get(1).getName()); - assertEquals("c3", tableSchema.getFields().get(2).getName()); - assertEquals("part1", tableSchema.getFields().get(3).getName()); - assertEquals("part0", tableSchema.getFields().get(4).getName()); - - //Test that changing column data type fails - partitionMap.clear(); - partitionMap.put("part1", "p1value6"); - partitionMap.put("part0", "p0value6"); - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); - - IOException exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); - - //Test that partition key is not allowed in data - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); - - List recordsContainingPartitionCols = new ArrayList(20); - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("c2value" + i); - objList.add("c3value" + i); - objList.add("p1value6"); - - recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); - } - - exc = null; - try { - runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); - } catch (IOException e) { - exc = e; - } - - List records = runMRRead(20, "part1 = \"p1value6\""); - assertEquals(20, records.size()); - records = runMRRead(20, "part0 = \"p0value6\""); - assertEquals(20, records.size()); - Integer i = 0; - for (HCatRecord rec : records) { - assertEquals(5, rec.size()); - assertTrue(rec.get(0).equals(i)); - assertTrue(rec.get(1).equals("c2value" + i)); - assertTrue(rec.get(2).equals("c3value" + i)); - assertTrue(rec.get(3).equals("p1value6")); - assertTrue(rec.get(4).equals("p0value6")); - i++; - } + List recordsContainingPartitionCols = new ArrayList(20); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("c2value" + i); + objList.add("c3value" + i); + objList.add("p1value6"); + + recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); } - //check behavior while change the order of columns - private void columnOrderChangeTest() throws Exception { + exc = null; + try { + runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); + } catch (IOException e) { + exc = e; + } - HCatSchema tableSchema = getTableSchema(); + List records = runMRRead(20, "part1 = \"p1value6\""); + assertEquals(20, records.size()); + records = runMRRead(20, "part0 = \"p0value6\""); + assertEquals(20, records.size()); + Integer i = 0; + for (HCatRecord rec : records) { + assertEquals(5, rec.size()); + assertTrue(rec.get(0).equals(i)); + assertTrue(rec.get(1).equals("c2value" + i)); + assertTrue(rec.get(2).equals("c3value" + i)); + assertTrue(rec.get(3).equals("p1value6")); + assertTrue(rec.get(4).equals("p0value6")); + i++; + } + } - assertEquals(5, tableSchema.getFields().size()); + //check behavior while change the order of columns + private void columnOrderChangeTest() throws Exception { - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + HCatSchema tableSchema = getTableSchema(); + assertEquals(5, tableSchema.getFields().size()); - writeRecords = new ArrayList(); + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - for (int i = 0; i < 10; i++) { - List objList = new ArrayList(); - objList.add(i); - objList.add("co strvalue" + i); - objList.add("co str2value" + i); + writeRecords = new ArrayList(); - writeRecords.add(new DefaultHCatRecord(objList)); - } + for (int i = 0; i < 10; i++) { + List objList = new ArrayList(); - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value8"); - partitionMap.put("part0", "p0value8"); + objList.add(i); + objList.add("co strvalue" + i); + objList.add("co str2value" + i); - Exception exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - } catch (IOException e) { - exc = e; - } + writeRecords.add(new DefaultHCatRecord(objList)); + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value8"); + partitionMap.put("part0", "p0value8"); + Exception exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + } catch (IOException e) { + exc = e; + } - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); - writeRecords = new ArrayList(); - for (int i = 0; i < 10; i++) { - List objList = new ArrayList(); + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - objList.add(i); - objList.add("co strvalue" + i); + writeRecords = new ArrayList(); - writeRecords.add(new DefaultHCatRecord(objList)); - } + for (int i = 0; i < 10; i++) { + List objList = new ArrayList(); - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + objList.add(i); + objList.add("co strvalue" + i); - //Read should get 10 + 20 + 10 + 10 + 20 rows - runMRRead(70); + writeRecords.add(new DefaultHCatRecord(objList)); } - //Test that data inserted through hcatoutputformat is readable from hive - private void hiveReadTest() throws Exception { + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); + //Read should get 10 + 20 + 10 + 10 + 20 rows + runMRRead(70); + } - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } + //Test that data inserted through hcatoutputformat is readable from hive + private void hiveReadTest() throws Exception { - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(70, res.size()); + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } + + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(70, res.size()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestInputJobInfo.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestInputJobInfo.java index 2867280..fe0ab2a 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestInputJobInfo.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestInputJobInfo.java @@ -29,23 +29,23 @@ */ public class TestInputJobInfo extends HCatBaseTest { - @Test - public void test4ArgCreate() throws Exception { - Properties p = new Properties(); - p.setProperty("key", "value"); - InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter", p); - Assert.assertEquals("Db", jobInfo.getDatabaseName()); - Assert.assertEquals("Table", jobInfo.getTableName()); - Assert.assertEquals("Filter", jobInfo.getFilter()); - Assert.assertEquals("value", jobInfo.getProperties().getProperty("key")); - } + @Test + public void test4ArgCreate() throws Exception { + Properties p = new Properties(); + p.setProperty("key", "value"); + InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter", p); + Assert.assertEquals("Db", jobInfo.getDatabaseName()); + Assert.assertEquals("Table", jobInfo.getTableName()); + Assert.assertEquals("Filter", jobInfo.getFilter()); + Assert.assertEquals("value", jobInfo.getProperties().getProperty("key")); + } - @Test - public void test3ArgCreate() throws Exception { - InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter"); - Assert.assertEquals("Db", jobInfo.getDatabaseName()); - Assert.assertEquals("Table", jobInfo.getTableName()); - Assert.assertEquals("Filter", jobInfo.getFilter()); - Assert.assertEquals(0, jobInfo.getProperties().size()); - } + @Test + public void test3ArgCreate() throws Exception { + InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter"); + Assert.assertEquals("Db", jobInfo.getDatabaseName()); + Assert.assertEquals("Table", jobInfo.getTableName()); + Assert.assertEquals("Filter", jobInfo.getFilter()); + Assert.assertEquals(0, jobInfo.getProperties().size()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestMultiOutputFormat.java index c05a7f0..8112f76 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestMultiOutputFormat.java @@ -65,273 +65,273 @@ */ public class TestMultiOutputFormat { - private static final Logger LOG = LoggerFactory.getLogger(TestMultiOutputFormat.class); - private static File workDir; - private static Configuration mrConf = null; - private static FileSystem fs = null; - private static MiniMRCluster mrCluster = null; - - @BeforeClass - public static void setup() throws IOException { - createWorkDir(); - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); + private static final Logger LOG = LoggerFactory.getLogger(TestMultiOutputFormat.class); + private static File workDir; + private static Configuration mrConf = null; + private static FileSystem fs = null; + private static MiniMRCluster mrCluster = null; + + @BeforeClass + public static void setup() throws IOException { + createWorkDir(); + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + } + + private static void createWorkDir() throws IOException { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_multiout_" + Math.abs(new Random().nextLong()) + "/"; + workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + } + + @AfterClass + public static void tearDown() throws IOException { + if (mrCluster != null) { + mrCluster.shutdown(); } - - private static void createWorkDir() throws IOException { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_multiout_" + Math.abs(new Random().nextLong()) + "/"; - workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); + FileUtil.fullyDelete(workDir); + } + + /** + * A test job that reads a input file and outputs each word and the index of + * the word encountered to a text file and sequence file with different key + * values. + */ + @Test + public void testMultiOutputFormatWithoutReduce() throws Throwable { + Job job = new Job(mrConf, "MultiOutNoReduce"); + job.setMapperClass(MultiOutWordIndexMapper.class); + job.setJarByClass(this.getClass()); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setNumReduceTasks(0); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); + configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, + IntWritable.class); + Path outDir = new Path(workDir.getPath(), job.getJobName()); + FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); + FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); + + String fileContent = "Hello World"; + String inputFile = createInputFile(fileContent); + FileInputFormat.setInputPaths(job, new Path(inputFile)); + + //Test for merging of configs + DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); + String dummyFile = createInputFile("dummy file"); + DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1") + .getConfiguration(), fs); + // duplicate of the value. Merging should remove duplicates + DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2") + .getConfiguration(), fs); + + configurer.configure(); + + // Verify if the configs are merged + Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); + List fileClassPathsList = Arrays.asList(fileClassPaths); + Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); + Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); + + URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); + List cacheFilesList = Arrays.asList(cacheFiles); + Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); + Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); + + Assert.assertTrue(job.waitForCompletion(true)); + + Path textOutPath = new Path(outDir, "out1/part-m-00000"); + String[] textOutput = readFully(textOutPath).split("\n"); + Path seqOutPath = new Path(outDir, "out2/part-m-00000"); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); + Text key = new Text(); + IntWritable value = new IntWritable(); + String[] words = fileContent.split(" "); + Assert.assertEquals(words.length, textOutput.length); + LOG.info("Verifying file contents"); + for (int i = 0; i < words.length; i++) { + Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); + reader.next(key, value); + Assert.assertEquals(words[i], key.toString()); + Assert.assertEquals((i + 1), value.get()); } - - @AfterClass - public static void tearDown() throws IOException { - if (mrCluster != null) { - mrCluster.shutdown(); - } - FileUtil.fullyDelete(workDir); + Assert.assertFalse(reader.next(key, value)); + } + + /** + * A word count test job that reads a input file and outputs the count of + * words to a text file and sequence file with different key values. + */ + @Test + public void testMultiOutputFormatWithReduce() throws Throwable { + Job job = new Job(mrConf, "MultiOutWithReduce"); + + job.setMapperClass(WordCountMapper.class); + job.setReducerClass(MultiOutWordCountReducer.class); + job.setJarByClass(this.getClass()); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + + configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); + configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, + IntWritable.class); + configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, + IntWritable.class); + Path outDir = new Path(workDir.getPath(), job.getJobName()); + FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); + FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); + + configurer.configure(); + + String fileContent = "Hello World Hello World World"; + String inputFile = createInputFile(fileContent); + FileInputFormat.setInputPaths(job, new Path(inputFile)); + + Assert.assertTrue(job.waitForCompletion(true)); + + Path textOutPath = new Path(outDir, "out1/part-r-00000"); + String[] textOutput = readFully(textOutPath).split("\n"); + Path seqOutPath = new Path(outDir, "out2/part-r-00000"); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); + Text key = new Text(); + IntWritable value = new IntWritable(); + String[] words = "Hello World".split(" "); + Assert.assertEquals(words.length, textOutput.length); + for (int i = 0; i < words.length; i++) { + Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); + reader.next(key, value); + Assert.assertEquals(words[i], key.toString()); + Assert.assertEquals((i + 2), value.get()); } - - /** - * A test job that reads a input file and outputs each word and the index of - * the word encountered to a text file and sequence file with different key - * values. - */ - @Test - public void testMultiOutputFormatWithoutReduce() throws Throwable { - Job job = new Job(mrConf, "MultiOutNoReduce"); - job.setMapperClass(MultiOutWordIndexMapper.class); - job.setJarByClass(this.getClass()); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setNumReduceTasks(0); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); - configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, - IntWritable.class); - Path outDir = new Path(workDir.getPath(), job.getJobName()); - FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); - FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); - - String fileContent = "Hello World"; - String inputFile = createInputFile(fileContent); - FileInputFormat.setInputPaths(job, new Path(inputFile)); - - //Test for merging of configs - DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); - String dummyFile = createInputFile("dummy file"); - DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1") - .getConfiguration(), fs); - // duplicate of the value. Merging should remove duplicates - DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2") - .getConfiguration(), fs); - - configurer.configure(); - - // Verify if the configs are merged - Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); - List fileClassPathsList = Arrays.asList(fileClassPaths); - Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); - Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); - - URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); - List cacheFilesList = Arrays.asList(cacheFiles); - Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); - Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); - - Assert.assertTrue(job.waitForCompletion(true)); - - Path textOutPath = new Path(outDir, "out1/part-m-00000"); - String[] textOutput = readFully(textOutPath).split("\n"); - Path seqOutPath = new Path(outDir, "out2/part-m-00000"); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); - Text key = new Text(); - IntWritable value = new IntWritable(); - String[] words = fileContent.split(" "); - Assert.assertEquals(words.length, textOutput.length); - LOG.info("Verifying file contents"); - for (int i = 0; i < words.length; i++) { - Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); - reader.next(key, value); - Assert.assertEquals(words[i], key.toString()); - Assert.assertEquals((i + 1), value.get()); - } - Assert.assertFalse(reader.next(key, value)); + Assert.assertFalse(reader.next(key, value)); + + } + + + /** + * Create a file for map input + * + * @return absolute path of the file. + * @throws IOException if any error encountered + */ + private String createInputFile(String content) throws IOException { + File f = File.createTempFile("input", "txt"); + FileWriter writer = new FileWriter(f); + writer.write(content); + writer.close(); + return f.getAbsolutePath(); + } + + private String readFully(Path file) throws IOException { + FSDataInputStream in = fs.open(file); + byte[] b = new byte[in.available()]; + in.readFully(b); + in.close(); + return new String(b); + } + + private static class MultiOutWordIndexMapper extends + Mapper { + + private IntWritable index = new IntWritable(1); + private Text word = new Text(); + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + MultiOutputFormat.write("out1", index, word, context); + MultiOutputFormat.write("out2", word, index, context); + index.set(index.get() + 1); + } } - - /** - * A word count test job that reads a input file and outputs the count of - * words to a text file and sequence file with different key values. - */ - @Test - public void testMultiOutputFormatWithReduce() throws Throwable { - Job job = new Job(mrConf, "MultiOutWithReduce"); - - job.setMapperClass(WordCountMapper.class); - job.setReducerClass(MultiOutWordCountReducer.class); - job.setJarByClass(this.getClass()); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(IntWritable.class); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - - configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); - configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, - IntWritable.class); - configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, - IntWritable.class); - Path outDir = new Path(workDir.getPath(), job.getJobName()); - FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); - FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); - - configurer.configure(); - - String fileContent = "Hello World Hello World World"; - String inputFile = createInputFile(fileContent); - FileInputFormat.setInputPaths(job, new Path(inputFile)); - - Assert.assertTrue(job.waitForCompletion(true)); - - Path textOutPath = new Path(outDir, "out1/part-r-00000"); - String[] textOutput = readFully(textOutPath).split("\n"); - Path seqOutPath = new Path(outDir, "out2/part-r-00000"); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); - Text key = new Text(); - IntWritable value = new IntWritable(); - String[] words = "Hello World".split(" "); - Assert.assertEquals(words.length, textOutput.length); - for (int i = 0; i < words.length; i++) { - Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); - reader.next(key, value); - Assert.assertEquals(words[i], key.toString()); - Assert.assertEquals((i + 2), value.get()); - } - Assert.assertFalse(reader.next(key, value)); - + } + + private static class WordCountMapper extends + Mapper { + + private final static IntWritable one = new IntWritable(1); + private Text word = new Text(); + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + context.write(word, one); + } } - - - /** - * Create a file for map input - * - * @return absolute path of the file. - * @throws IOException if any error encountered - */ - private String createInputFile(String content) throws IOException { - File f = File.createTempFile("input", "txt"); - FileWriter writer = new FileWriter(f); - writer.write(content); - writer.close(); - return f.getAbsolutePath(); + } + + private static class MultiOutWordCountReducer extends + Reducer { + + private IntWritable count = new IntWritable(); + + @Override + protected void reduce(Text word, Iterable values, Context context) + throws IOException, InterruptedException { + int sum = 0; + for (IntWritable val : values) { + sum += val.get(); + } + count.set(sum); + MultiOutputFormat.write("out1", count, word, context); + MultiOutputFormat.write("out2", word, count, context); + MultiOutputFormat.write("out3", word, count, context); } + } - private String readFully(Path file) throws IOException { - FSDataInputStream in = fs.open(file); - byte[] b = new byte[in.available()]; - in.readFully(b); - in.close(); - return new String(b); - } + private static class NullOutputFormat extends + org.apache.hadoop.mapreduce.lib.output.NullOutputFormat { - private static class MultiOutWordIndexMapper extends - Mapper { - - private IntWritable index = new IntWritable(1); - private Text word = new Text(); - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - StringTokenizer itr = new StringTokenizer(value.toString()); - while (itr.hasMoreTokens()) { - word.set(itr.nextToken()); - MultiOutputFormat.write("out1", index, word, context); - MultiOutputFormat.write("out2", word, index, context); - index.set(index.get() + 1); - } + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) { + return new OutputCommitter() { + public void abortTask(TaskAttemptContext taskContext) { } - } - private static class WordCountMapper extends - Mapper { - - private final static IntWritable one = new IntWritable(1); - private Text word = new Text(); - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - StringTokenizer itr = new StringTokenizer(value.toString()); - while (itr.hasMoreTokens()) { - word.set(itr.nextToken()); - context.write(word, one); - } + public void cleanupJob(JobContext jobContext) { } - } - private static class MultiOutWordCountReducer extends - Reducer { - - private IntWritable count = new IntWritable(); - - @Override - protected void reduce(Text word, Iterable values, Context context) - throws IOException, InterruptedException { - int sum = 0; - for (IntWritable val : values) { - sum += val.get(); - } - count.set(sum); - MultiOutputFormat.write("out1", count, word, context); - MultiOutputFormat.write("out2", word, count, context); - MultiOutputFormat.write("out3", word, count, context); + public void commitJob(JobContext jobContext) { } - } - - private static class NullOutputFormat extends - org.apache.hadoop.mapreduce.lib.output.NullOutputFormat { - - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) { - return new OutputCommitter() { - public void abortTask(TaskAttemptContext taskContext) { - } - public void cleanupJob(JobContext jobContext) { - } - - public void commitJob(JobContext jobContext) { - } - - public void commitTask(TaskAttemptContext taskContext) { - Assert.fail("needsTaskCommit is false but commitTask was called"); - } + public void commitTask(TaskAttemptContext taskContext) { + Assert.fail("needsTaskCommit is false but commitTask was called"); + } - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } + public boolean needsTaskCommit(TaskAttemptContext taskContext) { + return false; + } - public void setupJob(JobContext jobContext) { - } + public void setupJob(JobContext jobContext) { + } - public void setupTask(TaskAttemptContext taskContext) { - } - }; + public void setupTask(TaskAttemptContext taskContext) { } + }; } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestPassProperties.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestPassProperties.java index 9b30576..1aad829 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestPassProperties.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestPassProperties.java @@ -50,97 +50,97 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.TestPassProperties} instead */ public class TestPassProperties { - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - private static Driver driver; - private static PigServer server; - private static String[] input; - private static HiveConf hiveConf; - - public void Initialize() throws Exception { - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - int numRows = 3; - input = new String[numRows]; - for (int i = 0; i < numRows; i++) { - String col1 = "a" + i; - String col2 = "b" + i; - input[i] = i + "," + col1 + "," + col2; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - server = new PigServer(ExecType.LOCAL); + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + private static Driver driver; + private static PigServer server; + private static String[] input; + private static HiveConf hiveConf; + + public void Initialize() throws Exception { + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + int numRows = 3; + input = new String[numRows]; + for (int i = 0; i < numRows; i++) { + String col1 = "a" + i; + String col2 = "b" + i; + input[i] = i + "," + col1 + "," + col2; } - - @Test - public void testSequenceTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE bad_props_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table bad_props_table"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); - - boolean caughtException = false; - try { - Configuration conf = new Configuration(); - conf.set("hive.metastore.uris", "thrift://no.such.machine:10888"); - conf.set("hive.metastore.local", "false"); - Job job = new Job(conf, "Write-hcat-seq-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); - - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "bad_props_table", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - new FileOutputCommitterContainer(job, null).cleanupJob(job); - } catch (Exception e) { - caughtException = true; - assertTrue(e.getMessage().contains( - "Could not connect to meta store using any of the URIs provided")); - } - assertTrue(caughtException); + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + server = new PigServer(ExecType.LOCAL); + } + + @Test + public void testSequenceTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE bad_props_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table bad_props_table"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); + + boolean caughtException = false; + try { + Configuration conf = new Configuration(); + conf.set("hive.metastore.uris", "thrift://no.such.machine:10888"); + conf.set("hive.metastore.local", "false"); + Job job = new Job(conf, "Write-hcat-seq-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); + + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "bad_props_table", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + new FileOutputCommitterContainer(job, null).cleanupJob(job); + } catch (Exception e) { + caughtException = true; + assertTrue(e.getMessage().contains( + "Could not connect to meta store using any of the URIs provided")); } - - public static class Map extends Mapper { - - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - DefaultHCatRecord record = new DefaultHCatRecord(3); - record.set(0, Integer.parseInt(cols[0])); - record.set(1, cols[1]); - record.set(2, cols[2]); - context.write(NullWritable.get(), record); - } - } - - private HCatSchema getSchema() throws HCatException { - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, - "")); - schema.append(new HCatFieldSchema("a1", - HCatFieldSchema.Type.STRING, "")); - schema.append(new HCatFieldSchema("a2", - HCatFieldSchema.Type.STRING, "")); - return schema; + assertTrue(caughtException); + } + + public static class Map extends Mapper { + + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + DefaultHCatRecord record = new DefaultHCatRecord(3); + record.set(0, Integer.parseInt(cols[0])); + record.set(1, cols[1]); + record.set(2, cols[2]); + context.write(NullWritable.get(), record); } + } + + private HCatSchema getSchema() throws HCatException { + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, + "")); + schema.append(new HCatFieldSchema("a1", + HCatFieldSchema.Type.STRING, "")); + schema.append(new HCatFieldSchema("a2", + HCatFieldSchema.Type.STRING, "")); + return schema; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestSequenceFileReadWrite.java b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestSequenceFileReadWrite.java index 2cf3bdc..d70a258 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestSequenceFileReadWrite.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestSequenceFileReadWrite.java @@ -57,212 +57,212 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.TestSequenceFileReadWrite} instead */ public class TestSequenceFileReadWrite extends TestCase { - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - private static Driver driver; - private static PigServer server; - private static String[] input; - private static HiveConf hiveConf; + private static Driver driver; + private static PigServer server; + private static String[] input; + private static HiveConf hiveConf; - public void Initialize() throws Exception { - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); + public void Initialize() throws Exception { + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); - new File(TEST_WAREHOUSE_DIR).mkdirs(); + new File(TEST_WAREHOUSE_DIR).mkdirs(); - int numRows = 3; - input = new String[numRows]; - for (int i = 0; i < numRows; i++) { - String col1 = "a" + i; - String col2 = "b" + i; - input[i] = i + "," + col1 + "," + col2; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - server = new PigServer(ExecType.LOCAL); + int numRows = 3; + input = new String[numRows]; + for (int i = 0; i < numRows; i++) { + String col1 = "a" + i; + String col2 = "b" + i; + input[i] = i + "," + col1 + "," + col2; } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + server = new PigServer(ExecType.LOCAL); + } - @Test - public void testSequenceTableWriteRead() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table demo_table"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testSequenceTableWriteRead() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table demo_table"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - server.setBatchOn(); - server.registerQuery("A = load '" - + INPUT_FILE_NAME - + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); - server.registerQuery("store A into 'demo_table' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); + server.setBatchOn(); + server.registerQuery("A = load '" + + INPUT_FILE_NAME + + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); + server.registerQuery("store A into 'demo_table' using org.apache.hcatalog.pig.HCatStorer();"); + server.executeBatch(); - server.registerQuery("B = load 'demo_table' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("B"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.registerQuery("B = load 'demo_table' using org.apache.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("B"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - @Test - public void testTextTableWriteRead() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_1(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; - driver.run("drop table demo_table_1"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testTextTableWriteRead() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_1(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; + driver.run("drop table demo_table_1"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - server.setBatchOn(); - server.registerQuery("A = load '" - + INPUT_FILE_NAME - + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); - server.registerQuery("store A into 'demo_table_1' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); + server.setBatchOn(); + server.registerQuery("A = load '" + + INPUT_FILE_NAME + + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); + server.registerQuery("store A into 'demo_table_1' using org.apache.hcatalog.pig.HCatStorer();"); + server.executeBatch(); - server.registerQuery("B = load 'demo_table_1' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("B"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.registerQuery("B = load 'demo_table_1' using org.apache.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("B"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - @Test - public void testSequenceTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table demo_table_2"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testSequenceTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table demo_table_2"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - Configuration conf = new Configuration(); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - Job job = new Job(conf, "Write-hcat-seq-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); + Configuration conf = new Configuration(); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + Job job = new Job(conf, "Write-hcat-seq-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_2", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - if (!HCatUtil.isHadoop23()) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } - assertTrue(job.isSuccessful()); + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_2", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + if (!HCatUtil.isHadoop23()) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } + assertTrue(job.isSuccessful()); - server.setBatchOn(); - server.registerQuery("C = load 'default.demo_table_2' using org.apache.hcatalog.pig.HCatLoader();"); - server.executeBatch(); - Iterator XIter = server.openIterator("C"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.setBatchOn(); + server.registerQuery("C = load 'default.demo_table_2' using org.apache.hcatalog.pig.HCatLoader();"); + server.executeBatch(); + Iterator XIter = server.openIterator("C"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - @Test - public void testTextTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; - driver.run("drop table demo_table_3"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testTextTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; + driver.run("drop table demo_table_3"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - Configuration conf = new Configuration(); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - Job job = new Job(conf, "Write-hcat-text-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); + Configuration conf = new Configuration(); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + Job job = new Job(conf, "Write-hcat-text-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - job.setNumReduceTasks(0); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + job.setNumReduceTasks(0); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_3", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - assertTrue(job.waitForCompletion(true)); - if (!HCatUtil.isHadoop23()) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } - assertTrue(job.isSuccessful()); + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_3", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + assertTrue(job.waitForCompletion(true)); + if (!HCatUtil.isHadoop23()) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } + assertTrue(job.isSuccessful()); - server.setBatchOn(); - server.registerQuery("D = load 'default.demo_table_3' using org.apache.hcatalog.pig.HCatLoader();"); - server.executeBatch(); - Iterator XIter = server.openIterator("D"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.setBatchOn(); + server.registerQuery("D = load 'default.demo_table_3' using org.apache.hcatalog.pig.HCatLoader();"); + server.executeBatch(); + Iterator XIter = server.openIterator("D"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - public static class Map extends Mapper { + public static class Map extends Mapper { - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - DefaultHCatRecord record = new DefaultHCatRecord(3); - record.set(0, Integer.parseInt(cols[0])); - record.set(1, cols[1]); - record.set(2, cols[2]); - context.write(NullWritable.get(), record); - } + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + DefaultHCatRecord record = new DefaultHCatRecord(3); + record.set(0, Integer.parseInt(cols[0])); + record.set(1, cols[1]); + record.set(2, cols[2]); + context.write(NullWritable.get(), record); } + } - private HCatSchema getSchema() throws HCatException { - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, - "")); - schema.append(new HCatFieldSchema("a1", - HCatFieldSchema.Type.STRING, "")); - schema.append(new HCatFieldSchema("a2", - HCatFieldSchema.Type.STRING, "")); - return schema; - } + private HCatSchema getSchema() throws HCatException { + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, + "")); + schema.append(new HCatFieldSchema("a1", + HCatFieldSchema.Type.STRING, "")); + schema.append(new HCatFieldSchema("a2", + HCatFieldSchema.Type.STRING, "")); + return schema; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java b/hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java index d5c4a0a..f346782 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java @@ -53,195 +53,195 @@ */ public class TestRCFileMapReduceInputFormat extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestRCFileMapReduceInputFormat.class); + private static final Logger LOG = LoggerFactory.getLogger(TestRCFileMapReduceInputFormat.class); - private static Configuration conf = new Configuration(); + private static Configuration conf = new Configuration(); - private static ColumnarSerDe serDe; + private static ColumnarSerDe serDe; - private static Path file; + private static Path file; - private static FileSystem fs; + private static FileSystem fs; - private static Properties tbl; + private static Properties tbl; - static { - try { - fs = FileSystem.getLocal(conf); - Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); - file = new Path(dir, "test_rcfile"); - fs.delete(dir, true); - // the SerDe part is from TestLazySimpleSerDe - serDe = new ColumnarSerDe(); - // Create the SerDe - tbl = createProperties(); - serDe.initialize(conf, tbl); - } catch (Exception e) { - } + static { + try { + fs = FileSystem.getLocal(conf); + Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); + file = new Path(dir, "test_rcfile"); + fs.delete(dir, true); + // the SerDe part is from TestLazySimpleSerDe + serDe = new ColumnarSerDe(); + // Create the SerDe + tbl = createProperties(); + serDe.initialize(conf, tbl); + } catch (Exception e) { } - - private static BytesRefArrayWritable patialS = new BytesRefArrayWritable(); - - private static byte[][] bytesArray = null; - - private static BytesRefArrayWritable s = null; - - static { - try { - bytesArray = new byte[][]{"123".getBytes("UTF-8"), - "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), - "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), - "hive and hadoop".getBytes("UTF-8"), new byte[0], - "NULL".getBytes("UTF-8")}; - s = new BytesRefArrayWritable(bytesArray.length); - s.set(0, new BytesRefWritable("123".getBytes("UTF-8"))); - s.set(1, new BytesRefWritable("456".getBytes("UTF-8"))); - s.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); - s.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); - s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8"))); - s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8"))); - s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); - s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); - - // partial test init - patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); - patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); - patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(5, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); - - } catch (UnsupportedEncodingException e) { - } - } - - - /** For debugging and testing. */ - public static void main(String[] args) throws Exception { - int count = 10000; - boolean create = true; - - String usage = "Usage: RCFile " + "[-count N]" + " file"; - if (args.length == 0) { - LOG.error(usage); - System.exit(-1); - } - - try { - for (int i = 0; i < args.length; ++i) { // parse command line - if (args[i] == null) { - continue; - } else if (args[i].equals("-count")) { - count = Integer.parseInt(args[++i]); - } else { - // file is required parameter - file = new Path(args[i]); - } - } - - if (file == null) { - LOG.error(usage); - System.exit(-1); - } - - LOG.info("count = {}", count); - LOG.info("create = {}", create); - LOG.info("file = {}", file); - - // test.performanceTest(); - LOG.info("Finished."); - } finally { - fs.close(); - } + } + + private static BytesRefArrayWritable patialS = new BytesRefArrayWritable(); + + private static byte[][] bytesArray = null; + + private static BytesRefArrayWritable s = null; + + static { + try { + bytesArray = new byte[][]{"123".getBytes("UTF-8"), + "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), + "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), + "hive and hadoop".getBytes("UTF-8"), new byte[0], + "NULL".getBytes("UTF-8")}; + s = new BytesRefArrayWritable(bytesArray.length); + s.set(0, new BytesRefWritable("123".getBytes("UTF-8"))); + s.set(1, new BytesRefWritable("456".getBytes("UTF-8"))); + s.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); + s.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); + s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8"))); + s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8"))); + s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); + s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); + + // partial test init + patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); + patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); + patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(5, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); + + } catch (UnsupportedEncodingException e) { } + } - private static Properties createProperties() { - Properties tbl = new Properties(); - - // Set the configuration parameters - tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - tbl.setProperty("columns", - "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); - tbl.setProperty("columns.types", - "tinyint:smallint:int:bigint:double:string:int:string"); - tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); - return tbl; - } + /** For debugging and testing. */ + public static void main(String[] args) throws Exception { + int count = 10000; + boolean create = true; - public void testSynAndSplit() throws IOException, InterruptedException { - splitBeforeSync(); - splitRightBeforeSync(); - splitInMiddleOfSync(); - splitRightAfterSync(); - splitAfterSync(); + String usage = "Usage: RCFile " + "[-count N]" + " file"; + if (args.length == 0) { + LOG.error(usage); + System.exit(-1); } - private void splitBeforeSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(600, 1000, 2, 17684, null); - } + try { + for (int i = 0; i < args.length; ++i) { // parse command line + if (args[i] == null) { + continue; + } else if (args[i].equals("-count")) { + count = Integer.parseInt(args[++i]); + } else { + // file is required parameter + file = new Path(args[i]); + } + } - private void splitRightBeforeSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17750, null); - } + if (file == null) { + LOG.error(usage); + System.exit(-1); + } - private void splitInMiddleOfSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17760, null); + LOG.info("count = {}", count); + LOG.info("create = {}", create); + LOG.info("file = {}", file); + // test.performanceTest(); + LOG.info("Finished."); + } finally { + fs.close(); } - - private void splitRightAfterSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17770, null); + } + + private static Properties createProperties() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", + "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); + tbl.setProperty("columns.types", + "tinyint:smallint:int:bigint:double:string:int:string"); + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + return tbl; + } + + + public void testSynAndSplit() throws IOException, InterruptedException { + splitBeforeSync(); + splitRightBeforeSync(); + splitInMiddleOfSync(); + splitRightAfterSync(); + splitAfterSync(); + } + + private void splitBeforeSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(600, 1000, 2, 17684, null); + } + + private void splitRightBeforeSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17750, null); + } + + private void splitInMiddleOfSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17760, null); + + } + + private void splitRightAfterSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17770, null); + } + + private void splitAfterSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 19950, null); + } + + private void writeThenReadByRecordReader(int intervalRecordCount, + int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) + throws IOException, InterruptedException { + Path testDir = new Path(System.getProperty("test.data.dir", ".") + + "/mapred/testsmallfirstsplit"); + Path testFile = new Path(testDir, "test_rcfile"); + fs.delete(testFile, true); + Configuration cloneConf = new Configuration(conf); + RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); + cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount); + + RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); + + BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); + for (int i = 0; i < bytesArray.length; i++) { + BytesRefWritable cu = null; + cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); + bytes.set(i, cu); } - - private void splitAfterSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 19950, null); + for (int i = 0; i < writeCount; i++) { + writer.append(bytes); } - - private void writeThenReadByRecordReader(int intervalRecordCount, - int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) - throws IOException, InterruptedException { - Path testDir = new Path(System.getProperty("test.data.dir", ".") - + "/mapred/testsmallfirstsplit"); - Path testFile = new Path(testDir, "test_rcfile"); - fs.delete(testFile, true); - Configuration cloneConf = new Configuration(conf); - RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); - cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount); - - RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); - - BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); - for (int i = 0; i < bytesArray.length; i++) { - BytesRefWritable cu = null; - cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); - bytes.set(i, cu); - } - for (int i = 0; i < writeCount; i++) { - writer.append(bytes); - } - writer.close(); - - RCFileMapReduceInputFormat inputFormat = new RCFileMapReduceInputFormat(); - Configuration jonconf = new Configuration(cloneConf); - jonconf.set("mapred.input.dir", testDir.toString()); - JobContext context = new Job(jonconf); - context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize); - List splits = inputFormat.getSplits(context); - assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber); - int readCount = 0; - for (int i = 0; i < splits.size(); i++) { - TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID()); - RecordReader rr = inputFormat.createRecordReader(splits.get(i), tac); - rr.initialize(splits.get(i), tac); - while (rr.nextKeyValue()) { - readCount++; - } - } - assertEquals("readCount should be equal to writeCount", readCount, writeCount); + writer.close(); + + RCFileMapReduceInputFormat inputFormat = new RCFileMapReduceInputFormat(); + Configuration jonconf = new Configuration(cloneConf); + jonconf.set("mapred.input.dir", testDir.toString()); + JobContext context = new Job(jonconf); + context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize); + List splits = inputFormat.getSplits(context); + assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber); + int readCount = 0; + for (int i = 0; i < splits.size(); i++) { + TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID()); + RecordReader rr = inputFormat.createRecordReader(splits.get(i), tac); + rr.initialize(splits.get(i), tac); + while (rr.nextKeyValue()) { + readCount++; + } } + assertEquals("readCount should be equal to writeCount", readCount, writeCount); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hcatalog/security/TestHdfsAuthorizationProvider.java b/hcatalog/core/src/test/java/org/apache/hcatalog/security/TestHdfsAuthorizationProvider.java index be5c737..3cc75ad 100644 --- a/hcatalog/core/src/test/java/org/apache/hcatalog/security/TestHdfsAuthorizationProvider.java +++ b/hcatalog/core/src/test/java/org/apache/hcatalog/security/TestHdfsAuthorizationProvider.java @@ -57,530 +57,530 @@ */ public class TestHdfsAuthorizationProvider { - protected HCatDriver hcatDriver; - protected HiveMetaStoreClient msc; - protected HiveConf conf; - protected String whDir; - protected Path whPath; - protected FileSystem whFs; - protected Warehouse wh; - protected Hive hive; - - @Before - public void setUp() throws Exception { - - conf = new HiveConf(this.getClass()); - conf.set(ConfVars.PREEXECHOOKS.varname, ""); - conf.set(ConfVars.POSTEXECHOOKS.varname, ""); - conf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - conf.set("hive.metastore.local", "true"); - conf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - conf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, true); - conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - StorageDelegationAuthorizationProvider.class.getCanonicalName()); - conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); - - whDir = System.getProperty("test.warehouse.dir", "/tmp/testhdfsauthorization_wh"); - conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, whDir); - - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String username = ShimLoader.getHadoopShims().getShortUserName(ugi); - - whPath = new Path(whDir); - whFs = whPath.getFileSystem(conf); - - wh = new Warehouse(conf); - hive = Hive.get(conf); - - //clean up mess in HMS - HcatTestUtils.cleanupHMS(hive, wh, perm700); - - whFs.delete(whPath, true); - whFs.mkdirs(whPath, perm755); - - SessionState.start(new CliSessionState(conf)); - hcatDriver = new HCatDriver(); - } - - @After - public void tearDown() throws IOException { - whFs.close(); - hcatDriver.close(); - Hive.closeCurrent(); - } - - public Path getDbPath(String dbName) throws MetaException, HiveException { - return HcatTestUtils.getDbPath(hive, wh, dbName); - } - - public Path getTablePath(String dbName, String tableName) throws HiveException { - Table table = hive.getTable(dbName, tableName); - return table.getPath(); - } - - public Path getPartPath(String partName, String dbName, String tableName) throws HiveException { - return new Path(getTablePath(dbName, tableName), partName); - } - - /** Execute the query expecting success*/ - public void exec(String format, Object... args) throws Exception { - String command = String.format(format, args); - CommandProcessorResponse resp = hcatDriver.run(command); - Assert.assertEquals(resp.getErrorMessage(), 0, resp.getResponseCode()); - Assert.assertEquals(resp.getErrorMessage(), null, resp.getErrorMessage()); - } - - /** Execute the query expecting it to fail with AuthorizationException */ - public void execFail(String format, Object... args) throws Exception { - String command = String.format(format, args); - CommandProcessorResponse resp = hcatDriver.run(command); - Assert.assertNotSame(resp.getErrorMessage(), 0, resp.getResponseCode()); - Assert.assertTrue((resp.getResponseCode() == 40000) || (resp.getResponseCode() == 403)); - if (resp.getErrorMessage() != null) { - Assert.assertTrue(resp.getErrorMessage().contains("org.apache.hadoop.security.AccessControlException")); - } - } - - - /** - * Tests whether the warehouse directory is writable by the current user (as defined by Hadoop) - */ - @Test - public void testWarehouseIsWritable() throws Exception { - Path top = new Path(whPath, "_foobarbaz12_"); - try { - whFs.mkdirs(top); - } finally { - whFs.delete(top, true); - } - } - - @Test - public void testShowDatabases() throws Exception { - exec("CREATE DATABASE doo"); - exec("SHOW DATABASES"); - - whFs.setPermission(whPath, perm300); //revoke r - execFail("SHOW DATABASES"); - } - - @Test - public void testDatabaseOps() throws Exception { - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - exec("DESCRIBE DATABASE doo"); - exec("USE doo"); - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - exec("DROP DATABASE doo"); - - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - exec("DESCRIBE DATABASE doo2", dbPath.toUri()); - exec("USE doo2"); - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - exec("DROP DATABASE doo2", dbPath.toUri()); - - //custom non-existing location - exec("CREATE DATABASE doo3 LOCATION '%s/subpath'", dbPath.toUri()); - } - - @Test - public void testCreateDatabaseFail1() throws Exception { - whFs.setPermission(whPath, perm500); - execFail("CREATE DATABASE doo"); //in the default location - - whFs.setPermission(whPath, perm555); - execFail("CREATE DATABASE doo2"); - } - - @Test - public void testCreateDatabaseFail2() throws Exception { - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - - whFs.mkdirs(dbPath, perm700); - whFs.setPermission(dbPath, perm500); - execFail("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - } - - @Test - public void testDropDatabaseFail1() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); //in the default location - - whFs.setPermission(getDbPath("doo"), perm500); //revoke write - execFail("DROP DATABASE doo"); - } - - @Test - public void testDropDatabaseFail2() throws Exception { - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - - whFs.setPermission(dbPath, perm500); - execFail("DROP DATABASE doo2"); - } - - @Test - public void testDescSwitchDatabaseFail() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - whFs.setPermission(getDbPath("doo"), perm300); //revoke read - execFail("DESCRIBE DATABASE doo"); - execFail("USE doo"); - - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - whFs.mkdirs(dbPath, perm300); //revoke read - execFail("DESCRIBE DATABASE doo2", dbPath.toUri()); - execFail("USE doo2"); - } - - @Test - public void testShowTablesFail() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - exec("USE doo"); - whFs.setPermission(getDbPath("doo"), perm300); //revoke read - execFail("SHOW TABLES"); - execFail("SHOW TABLE EXTENDED LIKE foo1"); - } - - @Test - public void testTableOps() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE foo1"); - exec("DROP TABLE foo1"); - - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("DESCRIBE foo2"); - exec("DROP TABLE foo2"); - - //default db custom non existing location - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - exec("DESCRIBE foo3"); - exec("DROP TABLE foo3"); - - //non default db - exec("CREATE DATABASE doo"); - exec("USE doo"); - - exec("CREATE TABLE foo4 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE foo4"); - exec("DROP TABLE foo4"); - - //non-default db custom location - tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("DESCRIBE foo5"); - exec("DROP TABLE foo5"); - - //non-default db custom non existing location - exec("CREATE EXTERNAL TABLE foo6 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - exec("DESCRIBE foo6"); - exec("DROP TABLE foo6"); - - exec("DROP TABLE IF EXISTS foo_non_exists"); - - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE EXTENDED foo1"); - exec("DESCRIBE FORMATTED foo1"); - exec("DESCRIBE foo1.foo"); - - //deep non-existing path for the table - tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); - } - - @Test - public void testCreateTableFail1() throws Exception { - //default db - whFs.mkdirs(whPath, perm500); //revoke w - execFail("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - } - - @Test - public void testCreateTableFail2() throws Exception { - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); - execFail("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - - //default db custom non existing location - execFail("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - } - - @Test - public void testCreateTableFail3() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - whFs.setPermission(getDbPath("doo"), perm500); - - execFail("CREATE TABLE doo.foo4 (foo INT) STORED AS RCFILE"); - - //non-default db custom location, permission to write to tablePath, but not on db path - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("USE doo"); - execFail("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - } - - @Test - public void testCreateTableFail4() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - - //non-default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); - execFail("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - - //non-default db custom non existing location - execFail("CREATE EXTERNAL TABLE doo.foo6 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); - } - - @Test - public void testDropTableFail1() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke w - execFail("DROP TABLE foo1"); - } - - @Test - public void testDropTableFail2() throws Exception { - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); - execFail("DROP TABLE foo2"); - } - - @Test - public void testDropTableFail4() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - - //non-default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - - exec("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); - exec("USE doo"); //There is no DROP TABLE doo.foo5 support in Hive - execFail("DROP TABLE foo5"); - } - - @Test - public void testDescTableFail() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read - execFail("DESCRIBE foo1"); - - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm300); //revoke read - execFail("DESCRIBE foo2"); - } - - @Test - public void testAlterTableRename() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("ALTER TABLE foo1 RENAME TO foo2"); - - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("ALTER TABLE foo3 RENAME TO foo4"); - } - - @Test - public void testAlterTableRenameFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write - execFail("ALTER TABLE foo1 RENAME TO foo2"); - - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo3 RENAME TO foo4"); - } - - @Test - public void testAlterTableRelocate() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - } - - @Test - public void testAlterTableRelocateFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - //dont have access to new table loc - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - //have access to new table loc, but not old table loc - tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); - exec("CREATE EXTERNAL TABLE foo4 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - whFs.mkdirs(tablePath, perm500); //revoke write - tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); - execFail("ALTER TABLE foo4 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - } - - @Test - public void testAlterTable() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); - exec("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); - exec("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); - } - - @Test - public void testAddDropPartition() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - exec("ALTER TABLE foo1 ADD IF NOT EXISTS PARTITION (b='2010-10-10')"); - String relPath = new Random().nextInt() + "/mypart"; - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-11') LOCATION '%s'", relPath); - - exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT RCFILE"); - - exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT INPUTFORMAT " - + "'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " - + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver " - + "'mydriver' outputdriver 'yourdriver'"); - - exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-11')"); - } - - @Test - public void testAddPartitionFail1() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); - execFail("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - } - - @Test - public void testAddPartitionFail2() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - String relPath = new Random().nextInt() + "/mypart"; - Path partPath = new Path(getTablePath("default", "foo1"), relPath); - whFs.mkdirs(partPath, perm500); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); - } - - @Test - public void testDropPartitionFail1() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - whFs.mkdirs(getPartPath("b=2010-10-10", "default", "foo1"), perm500); - execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - } - - @Test - public void testDropPartitionFail2() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - String relPath = new Random().nextInt() + "/mypart"; - Path partPath = new Path(getTablePath("default", "foo1"), relPath); - whFs.mkdirs(partPath, perm700); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); - whFs.mkdirs(partPath, perm500); //revoke write - execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - } - - @Test - public void testAlterTableFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write - execFail("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); - execFail("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); - execFail("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); - } - - @Test - public void testShowTables() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); - exec("SHOW PARTITIONS foo1"); - - whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read - execFail("SHOW PARTITIONS foo1"); - } - - @Test - public void testAlterTablePartRename() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); - exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); - } - - @Test - public void testAlterTablePartRenameFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); - whFs.setPermission(loc, perm500); //revoke w - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); - } - - @Test - public void testAlterTablePartRelocate() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16')"); - Path partPath = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", partPath.makeQualified(whFs)); - } - - @Test - public void testAlterTablePartRelocateFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - - Path oldLoc = new Path(whPath, new Random().nextInt() + "/mypart"); - Path newLoc = new Path(whPath, new Random().nextInt() + "/mypart2"); - - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", oldLoc); - whFs.mkdirs(oldLoc, perm500); - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); - whFs.mkdirs(oldLoc, perm700); - whFs.mkdirs(newLoc, perm500); - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); - } + protected HCatDriver hcatDriver; + protected HiveMetaStoreClient msc; + protected HiveConf conf; + protected String whDir; + protected Path whPath; + protected FileSystem whFs; + protected Warehouse wh; + protected Hive hive; + + @Before + public void setUp() throws Exception { + + conf = new HiveConf(this.getClass()); + conf.set(ConfVars.PREEXECHOOKS.varname, ""); + conf.set(ConfVars.POSTEXECHOOKS.varname, ""); + conf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + conf.set("hive.metastore.local", "true"); + conf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + conf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, true); + conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + StorageDelegationAuthorizationProvider.class.getCanonicalName()); + conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); + + whDir = System.getProperty("test.warehouse.dir", "/tmp/testhdfsauthorization_wh"); + conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, whDir); + + UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + String username = ShimLoader.getHadoopShims().getShortUserName(ugi); + + whPath = new Path(whDir); + whFs = whPath.getFileSystem(conf); + + wh = new Warehouse(conf); + hive = Hive.get(conf); + + //clean up mess in HMS + HcatTestUtils.cleanupHMS(hive, wh, perm700); + + whFs.delete(whPath, true); + whFs.mkdirs(whPath, perm755); + + SessionState.start(new CliSessionState(conf)); + hcatDriver = new HCatDriver(); + } + + @After + public void tearDown() throws IOException { + whFs.close(); + hcatDriver.close(); + Hive.closeCurrent(); + } + + public Path getDbPath(String dbName) throws MetaException, HiveException { + return HcatTestUtils.getDbPath(hive, wh, dbName); + } + + public Path getTablePath(String dbName, String tableName) throws HiveException { + Table table = hive.getTable(dbName, tableName); + return table.getPath(); + } + + public Path getPartPath(String partName, String dbName, String tableName) throws HiveException { + return new Path(getTablePath(dbName, tableName), partName); + } + + /** Execute the query expecting success*/ + public void exec(String format, Object... args) throws Exception { + String command = String.format(format, args); + CommandProcessorResponse resp = hcatDriver.run(command); + Assert.assertEquals(resp.getErrorMessage(), 0, resp.getResponseCode()); + Assert.assertEquals(resp.getErrorMessage(), null, resp.getErrorMessage()); + } + + /** Execute the query expecting it to fail with AuthorizationException */ + public void execFail(String format, Object... args) throws Exception { + String command = String.format(format, args); + CommandProcessorResponse resp = hcatDriver.run(command); + Assert.assertNotSame(resp.getErrorMessage(), 0, resp.getResponseCode()); + Assert.assertTrue((resp.getResponseCode() == 40000) || (resp.getResponseCode() == 403)); + if (resp.getErrorMessage() != null) { + Assert.assertTrue(resp.getErrorMessage().contains("org.apache.hadoop.security.AccessControlException")); + } + } + + + /** + * Tests whether the warehouse directory is writable by the current user (as defined by Hadoop) + */ + @Test + public void testWarehouseIsWritable() throws Exception { + Path top = new Path(whPath, "_foobarbaz12_"); + try { + whFs.mkdirs(top); + } finally { + whFs.delete(top, true); + } + } + + @Test + public void testShowDatabases() throws Exception { + exec("CREATE DATABASE doo"); + exec("SHOW DATABASES"); + + whFs.setPermission(whPath, perm300); //revoke r + execFail("SHOW DATABASES"); + } + + @Test + public void testDatabaseOps() throws Exception { + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + exec("DESCRIBE DATABASE doo"); + exec("USE doo"); + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + exec("DROP DATABASE doo"); + + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + exec("DESCRIBE DATABASE doo2", dbPath.toUri()); + exec("USE doo2"); + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + exec("DROP DATABASE doo2", dbPath.toUri()); + + //custom non-existing location + exec("CREATE DATABASE doo3 LOCATION '%s/subpath'", dbPath.toUri()); + } + + @Test + public void testCreateDatabaseFail1() throws Exception { + whFs.setPermission(whPath, perm500); + execFail("CREATE DATABASE doo"); //in the default location + + whFs.setPermission(whPath, perm555); + execFail("CREATE DATABASE doo2"); + } + + @Test + public void testCreateDatabaseFail2() throws Exception { + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + + whFs.mkdirs(dbPath, perm700); + whFs.setPermission(dbPath, perm500); + execFail("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + } + + @Test + public void testDropDatabaseFail1() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); //in the default location + + whFs.setPermission(getDbPath("doo"), perm500); //revoke write + execFail("DROP DATABASE doo"); + } + + @Test + public void testDropDatabaseFail2() throws Exception { + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + + whFs.setPermission(dbPath, perm500); + execFail("DROP DATABASE doo2"); + } + + @Test + public void testDescSwitchDatabaseFail() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + whFs.setPermission(getDbPath("doo"), perm300); //revoke read + execFail("DESCRIBE DATABASE doo"); + execFail("USE doo"); + + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + whFs.mkdirs(dbPath, perm300); //revoke read + execFail("DESCRIBE DATABASE doo2", dbPath.toUri()); + execFail("USE doo2"); + } + + @Test + public void testShowTablesFail() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + exec("USE doo"); + whFs.setPermission(getDbPath("doo"), perm300); //revoke read + execFail("SHOW TABLES"); + execFail("SHOW TABLE EXTENDED LIKE foo1"); + } + + @Test + public void testTableOps() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE foo1"); + exec("DROP TABLE foo1"); + + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("DESCRIBE foo2"); + exec("DROP TABLE foo2"); + + //default db custom non existing location + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + exec("DESCRIBE foo3"); + exec("DROP TABLE foo3"); + + //non default db + exec("CREATE DATABASE doo"); + exec("USE doo"); + + exec("CREATE TABLE foo4 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE foo4"); + exec("DROP TABLE foo4"); + + //non-default db custom location + tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("DESCRIBE foo5"); + exec("DROP TABLE foo5"); + + //non-default db custom non existing location + exec("CREATE EXTERNAL TABLE foo6 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + exec("DESCRIBE foo6"); + exec("DROP TABLE foo6"); + + exec("DROP TABLE IF EXISTS foo_non_exists"); + + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE EXTENDED foo1"); + exec("DESCRIBE FORMATTED foo1"); + exec("DESCRIBE foo1.foo"); + + //deep non-existing path for the table + tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); + } + + @Test + public void testCreateTableFail1() throws Exception { + //default db + whFs.mkdirs(whPath, perm500); //revoke w + execFail("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + } + + @Test + public void testCreateTableFail2() throws Exception { + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); + execFail("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + + //default db custom non existing location + execFail("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + } + + @Test + public void testCreateTableFail3() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + whFs.setPermission(getDbPath("doo"), perm500); + + execFail("CREATE TABLE doo.foo4 (foo INT) STORED AS RCFILE"); + + //non-default db custom location, permission to write to tablePath, but not on db path + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("USE doo"); + execFail("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + } + + @Test + public void testCreateTableFail4() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + + //non-default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); + execFail("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + + //non-default db custom non existing location + execFail("CREATE EXTERNAL TABLE doo.foo6 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); + } + + @Test + public void testDropTableFail1() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke w + execFail("DROP TABLE foo1"); + } + + @Test + public void testDropTableFail2() throws Exception { + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); + execFail("DROP TABLE foo2"); + } + + @Test + public void testDropTableFail4() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + + //non-default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + + exec("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); + exec("USE doo"); //There is no DROP TABLE doo.foo5 support in Hive + execFail("DROP TABLE foo5"); + } + + @Test + public void testDescTableFail() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read + execFail("DESCRIBE foo1"); + + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm300); //revoke read + execFail("DESCRIBE foo2"); + } + + @Test + public void testAlterTableRename() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("ALTER TABLE foo1 RENAME TO foo2"); + + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("ALTER TABLE foo3 RENAME TO foo4"); + } + + @Test + public void testAlterTableRenameFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write + execFail("ALTER TABLE foo1 RENAME TO foo2"); + + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo3 RENAME TO foo4"); + } + + @Test + public void testAlterTableRelocate() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + } + + @Test + public void testAlterTableRelocateFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + //dont have access to new table loc + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + //have access to new table loc, but not old table loc + tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); + exec("CREATE EXTERNAL TABLE foo4 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + whFs.mkdirs(tablePath, perm500); //revoke write + tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); + execFail("ALTER TABLE foo4 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + } + + @Test + public void testAlterTable() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); + exec("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); + exec("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); + } + + @Test + public void testAddDropPartition() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + exec("ALTER TABLE foo1 ADD IF NOT EXISTS PARTITION (b='2010-10-10')"); + String relPath = new Random().nextInt() + "/mypart"; + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-11') LOCATION '%s'", relPath); + + exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT RCFILE"); + + exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT INPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver " + + "'mydriver' outputdriver 'yourdriver'"); + + exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-11')"); + } + + @Test + public void testAddPartitionFail1() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); + execFail("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + } + + @Test + public void testAddPartitionFail2() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + String relPath = new Random().nextInt() + "/mypart"; + Path partPath = new Path(getTablePath("default", "foo1"), relPath); + whFs.mkdirs(partPath, perm500); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); + } + + @Test + public void testDropPartitionFail1() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + whFs.mkdirs(getPartPath("b=2010-10-10", "default", "foo1"), perm500); + execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + } + + @Test + public void testDropPartitionFail2() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + String relPath = new Random().nextInt() + "/mypart"; + Path partPath = new Path(getTablePath("default", "foo1"), relPath); + whFs.mkdirs(partPath, perm700); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); + whFs.mkdirs(partPath, perm500); //revoke write + execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + } + + @Test + public void testAlterTableFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write + execFail("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); + execFail("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); + execFail("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); + } + + @Test + public void testShowTables() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); + exec("SHOW PARTITIONS foo1"); + + whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read + execFail("SHOW PARTITIONS foo1"); + } + + @Test + public void testAlterTablePartRename() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); + exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); + } + + @Test + public void testAlterTablePartRenameFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); + whFs.setPermission(loc, perm500); //revoke w + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); + } + + @Test + public void testAlterTablePartRelocate() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16')"); + Path partPath = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", partPath.makeQualified(whFs)); + } + + @Test + public void testAlterTablePartRelocateFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + + Path oldLoc = new Path(whPath, new Random().nextInt() + "/mypart"); + Path newLoc = new Path(whPath, new Random().nextInt() + "/mypart2"); + + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", oldLoc); + whFs.mkdirs(oldLoc, perm500); + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); + whFs.mkdirs(oldLoc, perm700); + whFs.mkdirs(newLoc, perm500); + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/ExitException.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/ExitException.java index 4e3226f..f394319 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/ExitException.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/ExitException.java @@ -20,19 +20,19 @@ package org.apache.hive.hcatalog; public class ExitException extends SecurityException { - private static final long serialVersionUID = -1982617086752946683L; - private final int status; + private static final long serialVersionUID = -1982617086752946683L; + private final int status; - /** - * @return the status - */ - public int getStatus() { - return status; - } + /** + * @return the status + */ + public int getStatus() { + return status; + } - public ExitException(int status) { + public ExitException(int status) { - super("Raising exception, instead of System.exit(). Return code was: " + status); - this.status = status; - } + super("Raising exception, instead of System.exit(). Return code was: " + status); + this.status = status; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/HcatTestUtils.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/HcatTestUtils.java index 1168936..c42587e 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/HcatTestUtils.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/HcatTestUtils.java @@ -38,63 +38,63 @@ * Utility methods for tests */ public class HcatTestUtils { - private static final Logger LOG = LoggerFactory.getLogger(HcatTestUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(HcatTestUtils.class); - public static FsPermission perm007 = FsPermission.createImmutable((short) 0007); // -------rwx - public static FsPermission perm070 = FsPermission.createImmutable((short) 0070); // ----rwx--- - public static FsPermission perm700 = FsPermission.createImmutable((short) 0700); // -rwx------ - public static FsPermission perm755 = FsPermission.createImmutable((short) 0755); // -rwxr-xr-x - public static FsPermission perm777 = FsPermission.createImmutable((short) 0777); // -rwxrwxrwx - public static FsPermission perm300 = FsPermission.createImmutable((short) 0300); // --wx------ - public static FsPermission perm500 = FsPermission.createImmutable((short) 0500); // -r-x------ - public static FsPermission perm555 = FsPermission.createImmutable((short) 0555); // -r-xr-xr-x + public static FsPermission perm007 = FsPermission.createImmutable((short) 0007); // -------rwx + public static FsPermission perm070 = FsPermission.createImmutable((short) 0070); // ----rwx--- + public static FsPermission perm700 = FsPermission.createImmutable((short) 0700); // -rwx------ + public static FsPermission perm755 = FsPermission.createImmutable((short) 0755); // -rwxr-xr-x + public static FsPermission perm777 = FsPermission.createImmutable((short) 0777); // -rwxrwxrwx + public static FsPermission perm300 = FsPermission.createImmutable((short) 0300); // --wx------ + public static FsPermission perm500 = FsPermission.createImmutable((short) 0500); // -r-x------ + public static FsPermission perm555 = FsPermission.createImmutable((short) 0555); // -r-xr-xr-x - /** - * Returns the database path. - */ - public static Path getDbPath(Hive hive, Warehouse wh, String dbName) throws MetaException, HiveException { - return wh.getDatabasePath(hive.getDatabase(dbName)); - } - - /** - * Removes all databases and tables from the metastore - */ - public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm) - throws HiveException, MetaException, NoSuchObjectException { - for (String dbName : hive.getAllDatabases()) { - if (dbName.equals("default")) { - continue; - } - try { - Path path = getDbPath(hive, wh, dbName); - FileSystem whFs = path.getFileSystem(hive.getConf()); - whFs.setPermission(path, defaultPerm); - } catch (IOException ex) { - //ignore - } - hive.dropDatabase(dbName, true, true, true); - } + /** + * Returns the database path. + */ + public static Path getDbPath(Hive hive, Warehouse wh, String dbName) throws MetaException, HiveException { + return wh.getDatabasePath(hive.getDatabase(dbName)); + } - //clean tables in default db - for (String tablename : hive.getAllTables("default")) { - hive.dropTable("default", tablename, true, true); - } + /** + * Removes all databases and tables from the metastore + */ + public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm) + throws HiveException, MetaException, NoSuchObjectException { + for (String dbName : hive.getAllDatabases()) { + if (dbName.equals("default")) { + continue; + } + try { + Path path = getDbPath(hive, wh, dbName); + FileSystem whFs = path.getFileSystem(hive.getConf()); + whFs.setPermission(path, defaultPerm); + } catch (IOException ex) { + //ignore + } + hive.dropDatabase(dbName, true, true, true); } - public static void createTestDataFile(String filename, String[] lines) throws IOException { - FileWriter writer = null; - try { - File file = new File(filename); - file.deleteOnExit(); - writer = new FileWriter(file); - for (String line : lines) { - writer.write(line + "\n"); - } - } finally { - if (writer != null) { - writer.close(); - } - } + //clean tables in default db + for (String tablename : hive.getAllTables("default")) { + hive.dropTable("default", tablename, true, true); + } + } + public static void createTestDataFile(String filename, String[] lines) throws IOException { + FileWriter writer = null; + try { + File file = new File(filename); + file.deleteOnExit(); + writer = new FileWriter(file); + for (String line : lines) { + writer.write(line + "\n"); + } + } finally { + if (writer != null) { + writer.close(); + } } + + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/MiniCluster.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/MiniCluster.java index 3d38c45..843240d 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/MiniCluster.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/MiniCluster.java @@ -43,159 +43,159 @@ * environment for Pig to run on top of the mini cluster. */ public class MiniCluster { - private MiniDFSCluster m_dfs = null; - private MiniMRCluster m_mr = null; - private FileSystem m_fileSys = null; - private JobConf m_conf = null; - - private final static MiniCluster INSTANCE = new MiniCluster(); - private static boolean isSetup = true; - - private MiniCluster() { - setupMiniDfsAndMrClusters(); + private MiniDFSCluster m_dfs = null; + private MiniMRCluster m_mr = null; + private FileSystem m_fileSys = null; + private JobConf m_conf = null; + + private final static MiniCluster INSTANCE = new MiniCluster(); + private static boolean isSetup = true; + + private MiniCluster() { + setupMiniDfsAndMrClusters(); + } + + private void setupMiniDfsAndMrClusters() { + try { + final int dataNodes = 1; // There will be 4 data nodes + final int taskTrackers = 1; // There will be 4 task tracker nodes + Configuration config = new Configuration(); + + // Builds and starts the mini dfs and mapreduce clusters + System.setProperty("hadoop.log.dir", "."); + m_dfs = new MiniDFSCluster(config, dataNodes, true, null); + + m_fileSys = m_dfs.getFileSystem(); + m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); + + // Create the configuration hadoop-site.xml file + File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); + conf_dir.mkdirs(); + File conf_file = new File(conf_dir, "hadoop-site.xml"); + + // Write the necessary config info to hadoop-site.xml + m_conf = m_mr.createJobConf(); + m_conf.setInt("mapred.submit.replication", 1); + m_conf.set("dfs.datanode.address", "0.0.0.0:0"); + m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); + m_conf.writeXml(new FileOutputStream(conf_file)); + + // Set the system properties needed by Pig + System.setProperty("cluster", m_conf.get("mapred.job.tracker")); + System.setProperty("namenode", m_conf.get("fs.default.name")); + System.setProperty("junit.hadoop.conf", conf_dir.getPath()); + } catch (IOException e) { + throw new RuntimeException(e); } - - private void setupMiniDfsAndMrClusters() { - try { - final int dataNodes = 1; // There will be 4 data nodes - final int taskTrackers = 1; // There will be 4 task tracker nodes - Configuration config = new Configuration(); - - // Builds and starts the mini dfs and mapreduce clusters - System.setProperty("hadoop.log.dir", "."); - m_dfs = new MiniDFSCluster(config, dataNodes, true, null); - - m_fileSys = m_dfs.getFileSystem(); - m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); - - // Create the configuration hadoop-site.xml file - File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); - conf_dir.mkdirs(); - File conf_file = new File(conf_dir, "hadoop-site.xml"); - - // Write the necessary config info to hadoop-site.xml - m_conf = m_mr.createJobConf(); - m_conf.setInt("mapred.submit.replication", 1); - m_conf.set("dfs.datanode.address", "0.0.0.0:0"); - m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); - m_conf.writeXml(new FileOutputStream(conf_file)); - - // Set the system properties needed by Pig - System.setProperty("cluster", m_conf.get("mapred.job.tracker")); - System.setProperty("namenode", m_conf.get("fs.default.name")); - System.setProperty("junit.hadoop.conf", conf_dir.getPath()); - } catch (IOException e) { - throw new RuntimeException(e); - } + } + + /** + * Returns the single instance of class MiniClusterBuilder that + * represents the resouces for a mini dfs cluster and a mini + * mapreduce cluster. + */ + public static MiniCluster buildCluster() { + if (!isSetup) { + INSTANCE.setupMiniDfsAndMrClusters(); + isSetup = true; } - - /** - * Returns the single instance of class MiniClusterBuilder that - * represents the resouces for a mini dfs cluster and a mini - * mapreduce cluster. - */ - public static MiniCluster buildCluster() { - if (!isSetup) { - INSTANCE.setupMiniDfsAndMrClusters(); - isSetup = true; - } - return INSTANCE; + return INSTANCE; + } + + public void shutDown() { + INSTANCE.shutdownMiniDfsAndMrClusters(); + } + + @Override + protected void finalize() { + shutdownMiniDfsAndMrClusters(); + } + + private void shutdownMiniDfsAndMrClusters() { + isSetup = false; + try { + if (m_fileSys != null) { + m_fileSys.close(); + } + } catch (IOException e) { + e.printStackTrace(); } - - public void shutDown() { - INSTANCE.shutdownMiniDfsAndMrClusters(); + if (m_dfs != null) { + m_dfs.shutdown(); } - - @Override - protected void finalize() { - shutdownMiniDfsAndMrClusters(); + if (m_mr != null) { + m_mr.shutdown(); } - - private void shutdownMiniDfsAndMrClusters() { - isSetup = false; - try { - if (m_fileSys != null) { - m_fileSys.close(); - } - } catch (IOException e) { - e.printStackTrace(); - } - if (m_dfs != null) { - m_dfs.shutdown(); - } - if (m_mr != null) { - m_mr.shutdown(); - } - m_fileSys = null; - m_dfs = null; - m_mr = null; + m_fileSys = null; + m_dfs = null; + m_mr = null; + } + + public Properties getProperties() { + errorIfNotSetup(); + Properties properties = new Properties(); + assert m_conf != null; + Iterator> iter = m_conf.iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + properties.put(entry.getKey(), entry.getValue()); } - - public Properties getProperties() { - errorIfNotSetup(); - Properties properties = new Properties(); - assert m_conf != null; - Iterator> iter = m_conf.iterator(); - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - properties.put(entry.getKey(), entry.getValue()); - } - return properties; + return properties; + } + + public void setProperty(String name, String value) { + errorIfNotSetup(); + m_conf.set(name, value); + } + + public FileSystem getFileSystem() { + errorIfNotSetup(); + return m_fileSys; + } + + /** + * Throw RunTimeException if isSetup is false + */ + private void errorIfNotSetup() { + if (isSetup) { + return; } - - public void setProperty(String name, String value) { - errorIfNotSetup(); - m_conf.set(name, value); + String msg = "function called on MiniCluster that has been shutdown"; + throw new RuntimeException(msg); + } + + static public void createInputFile(MiniCluster miniCluster, String fileName, + String[] inputData) + throws IOException { + FileSystem fs = miniCluster.getFileSystem(); + createInputFile(fs, fileName, inputData); + } + + static public void createInputFile(FileSystem fs, String fileName, + String[] inputData) throws IOException { + Path path = new Path(fileName); + if (fs.exists(path)) { + throw new IOException("File " + fileName + " already exists on the minicluster"); } - - public FileSystem getFileSystem() { - errorIfNotSetup(); - return m_fileSys; - } - - /** - * Throw RunTimeException if isSetup is false - */ - private void errorIfNotSetup() { - if (isSetup) { - return; - } - String msg = "function called on MiniCluster that has been shutdown"; - throw new RuntimeException(msg); - } - - static public void createInputFile(MiniCluster miniCluster, String fileName, - String[] inputData) - throws IOException { - FileSystem fs = miniCluster.getFileSystem(); - createInputFile(fs, fileName, inputData); - } - - static public void createInputFile(FileSystem fs, String fileName, - String[] inputData) throws IOException { - Path path = new Path(fileName); - if (fs.exists(path)) { - throw new IOException("File " + fileName + " already exists on the minicluster"); - } - FSDataOutputStream stream = fs.create(path); - PrintWriter pw = new PrintWriter(new OutputStreamWriter(stream, "UTF-8")); - for (int i = 0; i < inputData.length; i++) { - pw.println(inputData[i]); - } - pw.close(); - - } - - /** - * Helper to remove a dfs file from the minicluster DFS - * - * @param miniCluster reference to the Minicluster where the file should be deleted - * @param fileName pathname of the file to be deleted - * @throws IOException - */ - static public void deleteFile(MiniCluster miniCluster, String fileName) - throws IOException { - FileSystem fs = miniCluster.getFileSystem(); - fs.delete(new Path(fileName), true); + FSDataOutputStream stream = fs.create(path); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(stream, "UTF-8")); + for (int i = 0; i < inputData.length; i++) { + pw.println(inputData[i]); } + pw.close(); + + } + + /** + * Helper to remove a dfs file from the minicluster DFS + * + * @param miniCluster reference to the Minicluster where the file should be deleted + * @param fileName pathname of the file to be deleted + * @throws IOException + */ + static public void deleteFile(MiniCluster miniCluster, String fileName) + throws IOException { + FileSystem fs = miniCluster.getFileSystem(); + fs.delete(new Path(fileName), true); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/NoExitSecurityManager.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/NoExitSecurityManager.java index 8fc674b..fc2e3c7 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/NoExitSecurityManager.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/NoExitSecurityManager.java @@ -23,20 +23,20 @@ public class NoExitSecurityManager extends SecurityManager { - @Override - public void checkPermission(Permission perm) { - // allow anything. - } + @Override + public void checkPermission(Permission perm) { + // allow anything. + } - @Override - public void checkPermission(Permission perm, Object context) { - // allow anything. - } + @Override + public void checkPermission(Permission perm, Object context) { + // allow anything. + } - @Override - public void checkExit(int status) { + @Override + public void checkExit(int status) { - super.checkExit(status); - throw new ExitException(status); - } + super.checkExit(status); + throw new ExitException(status); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/DummyStorageHandler.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/DummyStorageHandler.java index 48f90a5..646acec 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/DummyStorageHandler.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/DummyStorageHandler.java @@ -53,237 +53,237 @@ class DummyStorageHandler extends HCatStorageHandler { + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration conf) { + } + + @Override + public Class getInputFormatClass() { + return DummyInputFormat.class; + } + + @Override + public Class getOutputFormatClass() { + return DummyOutputFormat.class; + } + + @Override + public Class getSerDeClass() { + return ColumnarSerDe.class; + } + + @Override + public HiveMetaHook getMetaHook() { + return null; + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + } + + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + return new DummyAuthProvider(); + } + + private class DummyAuthProvider implements HiveAuthorizationProvider { + @Override public Configuration getConf() { - return null; + return null; } + /* @param conf + * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf.Configuration) + */ @Override public void setConf(Configuration conf) { } + /* @param conf + /* @throws HiveException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#init(org.apache.hadoop.conf.Configuration) + */ @Override - public Class getInputFormatClass() { - return DummyInputFormat.class; + public void init(Configuration conf) throws HiveException { } + /* @return HiveAuthenticationProvider + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#getAuthenticator() + */ @Override - public Class getOutputFormatClass() { - return DummyOutputFormat.class; + public HiveAuthenticationProvider getAuthenticator() { + return null; } + /* @param authenticator + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#setAuthenticator(org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider) + */ @Override - public Class getSerDeClass() { - return ColumnarSerDe.class; + public void setAuthenticator(HiveAuthenticationProvider authenticator) { } + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public HiveMetaHook getMetaHook() { - return null; + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { } + /* @param db + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.metastore.api.Database, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { } + /* @param table + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* @param part + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Partition, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { } + /* @param table + /* @param part + /* @param columns + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - return new DummyAuthProvider(); + public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { } - private class DummyAuthProvider implements HiveAuthorizationProvider { - - @Override - public Configuration getConf() { - return null; - } - - /* @param conf - * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf.Configuration) - */ - @Override - public void setConf(Configuration conf) { - } - - /* @param conf - /* @throws HiveException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } - - /* @return HiveAuthenticationProvider - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#getAuthenticator() - */ - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } - - /* @param authenticator - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#setAuthenticator(org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider) - */ - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } - - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param db - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.metastore.api.Database, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param table - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param part - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Partition, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param table - /* @param part - /* @param columns - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } + } + /** + * The Class DummyInputFormat is a dummy implementation of the old hadoop + * mapred.InputFormat required by HiveStorageHandler. + */ + class DummyInputFormat implements + InputFormat { + + /* + * @see + * org.apache.hadoop.mapred.InputFormat#getRecordReader(org.apache.hadoop + * .mapred.InputSplit, org.apache.hadoop.mapred.JobConf, + * org.apache.hadoop.mapred.Reporter) + */ + @Override + public RecordReader getRecordReader( + InputSplit split, JobConf jobconf, Reporter reporter) + throws IOException { + throw new IOException("This operation is not supported."); } - /** - * The Class DummyInputFormat is a dummy implementation of the old hadoop - * mapred.InputFormat required by HiveStorageHandler. + /* + * @see + * org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop. + * mapred .JobConf, int) */ - class DummyInputFormat implements - InputFormat { - - /* - * @see - * org.apache.hadoop.mapred.InputFormat#getRecordReader(org.apache.hadoop - * .mapred.InputSplit, org.apache.hadoop.mapred.JobConf, - * org.apache.hadoop.mapred.Reporter) - */ - @Override - public RecordReader getRecordReader( - InputSplit split, JobConf jobconf, Reporter reporter) - throws IOException { - throw new IOException("This operation is not supported."); - } - - /* - * @see - * org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop. - * mapred .JobConf, int) - */ - @Override - public InputSplit[] getSplits(JobConf jobconf, int number) - throws IOException { - throw new IOException("This operation is not supported."); - } + @Override + public InputSplit[] getSplits(JobConf jobconf, int number) + throws IOException { + throw new IOException("This operation is not supported."); } + } + + /** + * The Class DummyOutputFormat is a dummy implementation of the old hadoop + * mapred.OutputFormat and HiveOutputFormat required by HiveStorageHandler. + */ + class DummyOutputFormat implements + OutputFormat, HCatRecord>, + HiveOutputFormat, HCatRecord> { + + /* + * @see + * org.apache.hadoop.mapred.OutputFormat#checkOutputSpecs(org.apache + * .hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf) + */ + @Override + public void checkOutputSpecs(FileSystem fs, JobConf jobconf) + throws IOException { + throw new IOException("This operation is not supported."); - /** - * The Class DummyOutputFormat is a dummy implementation of the old hadoop - * mapred.OutputFormat and HiveOutputFormat required by HiveStorageHandler. + } + + /* + * @see + * org.apache.hadoop.mapred.OutputFormat#getRecordWriter(org.apache. + * hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf, + * java.lang.String, org.apache.hadoop.util.Progressable) */ - class DummyOutputFormat implements - OutputFormat, HCatRecord>, - HiveOutputFormat, HCatRecord> { - - /* - * @see - * org.apache.hadoop.mapred.OutputFormat#checkOutputSpecs(org.apache - * .hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf) - */ - @Override - public void checkOutputSpecs(FileSystem fs, JobConf jobconf) - throws IOException { - throw new IOException("This operation is not supported."); - - } - - /* - * @see - * org.apache.hadoop.mapred.OutputFormat#getRecordWriter(org.apache. - * hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf, - * java.lang.String, org.apache.hadoop.util.Progressable) - */ - @Override - public RecordWriter, HCatRecord> getRecordWriter( - FileSystem fs, JobConf jobconf, String str, - Progressable progress) throws IOException { - throw new IOException("This operation is not supported."); - } - - /* - * @see - * org.apache.hadoop.hive.ql.io.HiveOutputFormat#getHiveRecordWriter(org - * .apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path, - * java.lang.Class, boolean, java.util.Properties, - * org.apache.hadoop.util.Progressable) - */ - @Override - public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( - JobConf jc, Path finalOutPath, - Class valueClass, boolean isCompressed, - Properties tableProperties, Progressable progress) - throws IOException { - throw new IOException("This operation is not supported."); - } + @Override + public RecordWriter, HCatRecord> getRecordWriter( + FileSystem fs, JobConf jobconf, String str, + Progressable progress) throws IOException { + throw new IOException("This operation is not supported."); + } + /* + * @see + * org.apache.hadoop.hive.ql.io.HiveOutputFormat#getHiveRecordWriter(org + * .apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path, + * java.lang.Class, boolean, java.util.Properties, + * org.apache.hadoop.util.Progressable) + */ + @Override + public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( + JobConf jc, Path finalOutPath, + Class valueClass, boolean isCompressed, + Properties tableProperties, Progressable progress) + throws IOException { + throw new IOException("This operation is not supported."); } + } + } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestPermsGrp.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestPermsGrp.java index a1a8b93..f3f674e 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestPermsGrp.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestPermsGrp.java @@ -54,179 +54,179 @@ public class TestPermsGrp extends TestCase { - private boolean isServerRunning = false; - private static final int msPort = 20101; - private HiveConf hcatConf; - private Warehouse clientWH; - private HiveMetaStoreClient msc; - private static final Logger LOG = LoggerFactory.getLogger(TestPermsGrp.class); - - @Override - protected void tearDown() throws Exception { - System.setSecurityManager(securityManager); + private boolean isServerRunning = false; + private static final int msPort = 20101; + private HiveConf hcatConf; + private Warehouse clientWH; + private HiveMetaStoreClient msc; + private static final Logger LOG = LoggerFactory.getLogger(TestPermsGrp.class); + + @Override + protected void tearDown() throws Exception { + System.setSecurityManager(securityManager); + } + + @Override + protected void setUp() throws Exception { + + if (isServerRunning) { + return; } - @Override - protected void setUp() throws Exception { - - if (isServerRunning) { - return; - } - - MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); - - isServerRunning = true; - - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://127.0.0.1:" + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT.varname, "60"); - clientWH = new Warehouse(hcatConf); - msc = new HiveMetaStoreClient(hcatConf, null); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); + + isServerRunning = true; + + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + + hcatConf = new HiveConf(this.getClass()); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://127.0.0.1:" + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hcatConf.set(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT.varname, "60"); + clientWH = new Warehouse(hcatConf); + msc = new HiveMetaStoreClient(hcatConf, null); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + + public void testCustomPerms() throws Exception { + + String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + String tblName = "simptbl"; + String typeName = "Person"; + + try { + + // Lets first test for default permissions, this is the case when user specified nothing. + Table tbl = getTable(dbName, tblName, typeName); + msc.createTable(tbl); + Database db = Hive.get(hcatConf).getDatabase(dbName); + Path dfsPath = clientWH.getTablePath(db, tblName); + cleanupTbl(dbName, tblName, typeName); + + // Next user did specify perms. + try { + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx-wx---"}); + } catch (Exception e) { + assertTrue(e instanceof ExitException); + assertEquals(((ExitException) e).getStatus(), 0); + } + dfsPath = clientWH.getTablePath(db, tblName); + assertTrue(dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath).getPermission().equals(FsPermission.valueOf("drwx-wx---"))); + + cleanupTbl(dbName, tblName, typeName); + + // User specified perms in invalid format. + hcatConf.set(HCatConstants.HCAT_PERMS, "rwx"); + // make sure create table fails. + try { + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx"}); + assert false; + } catch (Exception me) { + assertTrue(me instanceof ExitException); + } + // No physical dir gets created. + dfsPath = clientWH.getTablePath(db, tblName); + try { + dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); + assert false; + } catch (Exception fnfe) { + assertTrue(fnfe instanceof FileNotFoundException); + } + + // And no metadata gets created. + try { + msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName); + assert false; + } catch (Exception e) { + assertTrue(e instanceof NoSuchObjectException); + assertEquals("default.simptbl table not found", e.getMessage()); + } + + // test for invalid group name + hcatConf.set(HCatConstants.HCAT_PERMS, "drw-rw-rw-"); + hcatConf.set(HCatConstants.HCAT_GROUP, "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"); + + try { + // create table must fail. + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rw-rw-rw-", "-g", "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"}); + assert false; + } catch (Exception me) { + assertTrue(me instanceof SecurityException); + } + + try { + // no metadata should get created. + msc.getTable(dbName, tblName); + assert false; + } catch (Exception e) { + assertTrue(e instanceof NoSuchObjectException); + assertEquals("default.simptbl table not found", e.getMessage()); + } + try { + // neither dir should get created. + dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); + assert false; + } catch (Exception e) { + assertTrue(e instanceof FileNotFoundException); + } + + } catch (Exception e) { + LOG.error("testCustomPerms failed.", e); + throw e; } + } + private void silentDropDatabase(String dbName) throws MetaException, TException { + try { + for (String tableName : msc.getTables(dbName, "*")) { + msc.dropTable(dbName, tableName); + } - public void testCustomPerms() throws Exception { - - String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - String tblName = "simptbl"; - String typeName = "Person"; - - try { - - // Lets first test for default permissions, this is the case when user specified nothing. - Table tbl = getTable(dbName, tblName, typeName); - msc.createTable(tbl); - Database db = Hive.get(hcatConf).getDatabase(dbName); - Path dfsPath = clientWH.getTablePath(db, tblName); - cleanupTbl(dbName, tblName, typeName); - - // Next user did specify perms. - try { - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx-wx---"}); - } catch (Exception e) { - assertTrue(e instanceof ExitException); - assertEquals(((ExitException) e).getStatus(), 0); - } - dfsPath = clientWH.getTablePath(db, tblName); - assertTrue(dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath).getPermission().equals(FsPermission.valueOf("drwx-wx---"))); - - cleanupTbl(dbName, tblName, typeName); - - // User specified perms in invalid format. - hcatConf.set(HCatConstants.HCAT_PERMS, "rwx"); - // make sure create table fails. - try { - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx"}); - assert false; - } catch (Exception me) { - assertTrue(me instanceof ExitException); - } - // No physical dir gets created. - dfsPath = clientWH.getTablePath(db, tblName); - try { - dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); - assert false; - } catch (Exception fnfe) { - assertTrue(fnfe instanceof FileNotFoundException); - } - - // And no metadata gets created. - try { - msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName); - assert false; - } catch (Exception e) { - assertTrue(e instanceof NoSuchObjectException); - assertEquals("default.simptbl table not found", e.getMessage()); - } - - // test for invalid group name - hcatConf.set(HCatConstants.HCAT_PERMS, "drw-rw-rw-"); - hcatConf.set(HCatConstants.HCAT_GROUP, "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"); - - try { - // create table must fail. - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rw-rw-rw-", "-g", "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"}); - assert false; - } catch (Exception me) { - assertTrue(me instanceof SecurityException); - } - - try { - // no metadata should get created. - msc.getTable(dbName, tblName); - assert false; - } catch (Exception e) { - assertTrue(e instanceof NoSuchObjectException); - assertEquals("default.simptbl table not found", e.getMessage()); - } - try { - // neither dir should get created. - dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); - assert false; - } catch (Exception e) { - assertTrue(e instanceof FileNotFoundException); - } - - } catch (Exception e) { - LOG.error("testCustomPerms failed.", e); - throw e; - } + } catch (NoSuchObjectException e) { } + } - private void silentDropDatabase(String dbName) throws MetaException, TException { - try { - for (String tableName : msc.getTables(dbName, "*")) { - msc.dropTable(dbName, tableName); - } + private void cleanupTbl(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, InvalidOperationException { - } catch (NoSuchObjectException e) { - } - } - - private void cleanupTbl(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, InvalidOperationException { + msc.dropTable(dbName, tblName); + msc.dropType(typeName); + } - msc.dropTable(dbName, tblName); - msc.dropType(typeName); - } + private Table getTable(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, AlreadyExistsException, InvalidObjectException { - private Table getTable(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, AlreadyExistsException, InvalidObjectException { + msc.dropTable(dbName, tblName); + silentDropDatabase(dbName); - msc.dropTable(dbName, tblName); - silentDropDatabase(dbName); + msc.dropType(typeName); + Type typ1 = new Type(); + typ1.setName(typeName); + typ1.setFields(new ArrayList(1)); + typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, "")); + msc.createType(typ1); - msc.dropType(typeName); - Type typ1 = new Type(); - typ1.setName(typeName); - typ1.setFields(new ArrayList(1)); - typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, "")); - msc.createType(typ1); + Table tbl = new Table(); + tbl.setDbName(dbName); + tbl.setTableName(tblName); + StorageDescriptor sd = new StorageDescriptor(); + tbl.setSd(sd); + sd.setCols(typ1.getFields()); - Table tbl = new Table(); - tbl.setDbName(dbName); - tbl.setTableName(tblName); - StorageDescriptor sd = new StorageDescriptor(); - tbl.setSd(sd); - sd.setCols(typ1.getFields()); - - sd.setSerdeInfo(new SerDeInfo()); - return tbl; - } + sd.setSerdeInfo(new SerDeInfo()); + return tbl; + } - private SecurityManager securityManager; + private SecurityManager securityManager; } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java index 4bfb7fc..3cc548e 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java @@ -53,341 +53,341 @@ public class TestSemanticAnalysis extends HCatBaseTest { - private static final Logger LOG = LoggerFactory.getLogger(TestSemanticAnalysis.class); - private static final String TBL_NAME = "junit_sem_analysis"; - - private Driver hcatDriver = null; - private String query; - - @Before - public void setUpHCatDriver() throws IOException { - if (hcatDriver == null) { - HiveConf hcatConf = new HiveConf(hiveConf); - hcatConf.set(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE.varname, - "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } + private static final Logger LOG = LoggerFactory.getLogger(TestSemanticAnalysis.class); + private static final String TBL_NAME = "junit_sem_analysis"; + + private Driver hcatDriver = null; + private String query; + + @Before + public void setUpHCatDriver() throws IOException { + if (hcatDriver == null) { + HiveConf hcatConf = new HiveConf(hiveConf); + hcatConf.set(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE.varname, + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); } - - @Test - public void testDescDB() throws CommandNeedRetryException, IOException { - hcatDriver.run("drop database mydb cascade"); - assertEquals(0, hcatDriver.run("create database mydb").getResponseCode()); - CommandProcessorResponse resp = hcatDriver.run("describe database mydb"); - assertEquals(0, resp.getResponseCode()); - ArrayList result = new ArrayList(); - hcatDriver.getResults(result); - assertTrue(result.get(0).contains("mydb.db")); - hcatDriver.run("drop database mydb cascade"); - } - - @Test - public void testCreateTblWithLowerCasePartNames() throws CommandNeedRetryException, MetaException, TException, NoSuchObjectException { - driver.run("drop table junit_sem_analysis"); - CommandProcessorResponse resp = driver.run("create table junit_sem_analysis (a int) partitioned by (B string) stored as TEXTFILE"); - assertEquals(resp.getResponseCode(), 0); - assertEquals(null, resp.getErrorMessage()); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals("Partition key name case problem", "b", tbl.getPartitionKeys().get(0).getName()); - driver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblFFpart() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - driver.run("drop table junit_sem_analysis"); - driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"); - driver.run("alter table junit_sem_analysis add partition (b='2010-10-10')"); - hcatDriver.run("alter table junit_sem_analysis partition (b='2010-10-10') set fileformat RCFILE"); - - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(TextInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - List partVals = new ArrayList(1); - partVals.add("2010-10-10"); - Partition part = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals); - - assertEquals(RCFileInputFormat.class.getName(), part.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), part.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testUsNonExistentDB() throws CommandNeedRetryException { - CommandProcessorResponse resp = hcatDriver.run("use no_such_db"); - assertEquals(1, resp.getResponseCode()); - } - - @Test - public void testDatabaseOperations() throws MetaException, CommandNeedRetryException { - - List dbs = client.getAllDatabases(); - String testDb1 = "testdatabaseoperatons1"; - String testDb2 = "testdatabaseoperatons2"; - - if (dbs.contains(testDb1.toLowerCase())) { - assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); - } - - if (dbs.contains(testDb2.toLowerCase())) { - assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); - } - - assertEquals(0, hcatDriver.run("create database " + testDb1).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb1)); - assertEquals(0, hcatDriver.run("create database if not exists " + testDb1).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb1)); - assertEquals(0, hcatDriver.run("create database if not exists " + testDb2).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb2)); - - assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); - assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); - assertFalse(client.getAllDatabases().contains(testDb1)); - assertFalse(client.getAllDatabases().contains(testDb2)); - } - - @Test - public void testCreateTableIfNotExists() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table " + TBL_NAME); - hcatDriver.run("create table junit_sem_analysis (a int) stored as RCFILE"); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - List cols = tbl.getSd().getCols(); - assertEquals(1, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - CommandProcessorResponse resp = hcatDriver.run("create table if not exists junit_sem_analysis (a int) stored as RCFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - cols = tbl.getSd().getCols(); - assertEquals(1, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblTouch() throws CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis touch"); - assertEquals(0, response.getResponseCode()); - - hcatDriver.run("alter table junit_sem_analysis touch partition (b='12')"); - assertEquals(0, response.getResponseCode()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testChangeColumns() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis change a a1 int"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis change a1 a string"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis change a a int after c"); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAddReplaceCols() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis replace columns (a1 tinyint)"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis add columns (d tinyint)"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - - response = hcatDriver.run("describe extended junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - List cols = tbl.getSd().getCols(); - assertEquals(2, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a1", "tinyint", null))); - assertTrue(cols.get(1).equals(new FieldSchema("d", "tinyint", null))); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblClusteredBy() throws CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis clustered by (a) into 7 buckets"); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testDescDB() throws CommandNeedRetryException, IOException { + hcatDriver.run("drop database mydb cascade"); + assertEquals(0, hcatDriver.run("create database mydb").getResponseCode()); + CommandProcessorResponse resp = hcatDriver.run("describe database mydb"); + assertEquals(0, resp.getResponseCode()); + ArrayList result = new ArrayList(); + hcatDriver.getResults(result); + assertTrue(result.get(0).contains("mydb.db")); + hcatDriver.run("drop database mydb cascade"); + } + + @Test + public void testCreateTblWithLowerCasePartNames() throws CommandNeedRetryException, MetaException, TException, NoSuchObjectException { + driver.run("drop table junit_sem_analysis"); + CommandProcessorResponse resp = driver.run("create table junit_sem_analysis (a int) partitioned by (B string) stored as TEXTFILE"); + assertEquals(resp.getResponseCode(), 0); + assertEquals(null, resp.getErrorMessage()); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals("Partition key name case problem", "b", tbl.getPartitionKeys().get(0).getName()); + driver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblFFpart() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"); + driver.run("alter table junit_sem_analysis add partition (b='2010-10-10')"); + hcatDriver.run("alter table junit_sem_analysis partition (b='2010-10-10') set fileformat RCFILE"); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(TextInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + List partVals = new ArrayList(1); + partVals.add("2010-10-10"); + Partition part = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals); + + assertEquals(RCFileInputFormat.class.getName(), part.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), part.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testUsNonExistentDB() throws CommandNeedRetryException { + CommandProcessorResponse resp = hcatDriver.run("use no_such_db"); + assertEquals(1, resp.getResponseCode()); + } + + @Test + public void testDatabaseOperations() throws MetaException, CommandNeedRetryException { + + List dbs = client.getAllDatabases(); + String testDb1 = "testdatabaseoperatons1"; + String testDb2 = "testdatabaseoperatons2"; + + if (dbs.contains(testDb1.toLowerCase())) { + assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); } - @Test - public void testAlterTableSetFF() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("alter table junit_sem_analysis set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); - hcatDriver.run("desc extended junit_sem_analysis"); - - tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); + if (dbs.contains(testDb2.toLowerCase())) { + assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); } - @Test - public void testAddPartFail() throws CommandNeedRetryException { + assertEquals(0, hcatDriver.run("create database " + testDb1).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb1)); + assertEquals(0, hcatDriver.run("create database if not exists " + testDb1).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb1)); + assertEquals(0, hcatDriver.run("create database if not exists " + testDb2).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb2)); + + assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); + assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); + assertFalse(client.getAllDatabases().contains(testDb1)); + assertFalse(client.getAllDatabases().contains(testDb2)); + } + + @Test + public void testCreateTableIfNotExists() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table " + TBL_NAME); + hcatDriver.run("create table junit_sem_analysis (a int) stored as RCFILE"); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + List cols = tbl.getSd().getCols(); + assertEquals(1, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + CommandProcessorResponse resp = hcatDriver.run("create table if not exists junit_sem_analysis (a int) stored as RCFILE"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + cols = tbl.getSd().getCols(); + assertEquals(1, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblTouch() throws CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis touch"); + assertEquals(0, response.getResponseCode()); + + hcatDriver.run("alter table junit_sem_analysis touch partition (b='12')"); + assertEquals(0, response.getResponseCode()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testChangeColumns() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis change a a1 int"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis change a1 a string"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis change a a int after c"); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddReplaceCols() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis replace columns (a1 tinyint)"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis add columns (d tinyint)"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + + response = hcatDriver.run("describe extended junit_sem_analysis"); + assertEquals(0, response.getResponseCode()); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + List cols = tbl.getSd().getCols(); + assertEquals(2, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a1", "tinyint", null))); + assertTrue(cols.get(1).equals(new FieldSchema("d", "tinyint", null))); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblClusteredBy() throws CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis clustered by (a) into 7 buckets"); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTableSetFF() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("alter table junit_sem_analysis set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); + hcatDriver.run("desc extended junit_sem_analysis"); + + tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddPartFail() throws CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location 'README.txt'"); + assertEquals(0, response.getResponseCode()); + driver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddPartPass() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location '" + TEST_DATA_DIR + "'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testCTAS() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) as select * from tbl2"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(40000, response.getResponseCode()); + assertTrue(response.getErrorMessage().contains("FAILED: SemanticException Operation not supported. Create table as Select is not a valid operation.")); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testStoredAs() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int)"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddDriverInfo() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as " + + "INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver' "; + assertEquals(0, hcatDriver.run(query).getResponseCode()); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testInvalidateNonStringPartition() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b int) stored as RCFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(40000, response.getResponseCode()); + assertEquals("FAILED: SemanticException Operation not supported. HCatalog only supports partition columns of type string. For column: b Found type: int", + response.getErrorMessage()); - driver.run("drop table junit_sem_analysis"); - driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location 'README.txt'"); - assertEquals(0, response.getResponseCode()); - driver.run("drop table junit_sem_analysis"); - } + } - @Test - public void testAddPartPass() throws IOException, CommandNeedRetryException { + @Test + public void testInvalidateSeqFileStoredAs() throws IOException, CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location '" + TEST_DATA_DIR + "'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - hcatDriver.run("drop table junit_sem_analysis"); - } + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as SEQUENCEFILE"; - @Test - public void testCTAS() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) as select * from tbl2"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(40000, response.getResponseCode()); - assertTrue(response.getErrorMessage().contains("FAILED: SemanticException Operation not supported. Create table as Select is not a valid operation.")); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testStoredAs() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int)"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); - @Test - public void testAddDriverInfo() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + } - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as " + - "INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver' "; - assertEquals(0, hcatDriver.run(query).getResponseCode()); + @Test + public void testInvalidateTextFileStoredAs() throws IOException, CommandNeedRetryException { - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } + } - @Test - public void testInvalidateNonStringPartition() throws IOException, CommandNeedRetryException { + @Test + public void testInvalidateClusteredBy() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) clustered by (a) into 10 buckets stored as TEXTFILE"; - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b int) stored as RCFILE"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + } + + @Test + public void testCTLFail() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("drop table like_table"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(40000, response.getResponseCode()); - assertEquals("FAILED: SemanticException Operation not supported. HCatalog only supports partition columns of type string. For column: b Found type: int", - response.getErrorMessage()); + driver.run(query); + query = "create table like_table like junit_sem_analysis"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + } + + @Test + public void testCTLPass() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + try { + hcatDriver.run("drop table junit_sem_analysis"); + } catch (Exception e) { + LOG.error("Error in drop table.", e); } - - @Test - public void testInvalidateSeqFileStoredAs() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as SEQUENCEFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - - } - - @Test - public void testInvalidateTextFileStoredAs() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - - } - - @Test - public void testInvalidateClusteredBy() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) clustered by (a) into 10 buckets stored as TEXTFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - } - - @Test - public void testCTLFail() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_sem_analysis"); - driver.run("drop table like_table"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - - driver.run(query); - query = "create table like_table like junit_sem_analysis"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - } - - @Test - public void testCTLPass() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - try { - hcatDriver.run("drop table junit_sem_analysis"); - } catch (Exception e) { - LOG.error("Error in drop table.", e); - } - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - - hcatDriver.run(query); - String likeTbl = "like_table"; - hcatDriver.run("drop table " + likeTbl); - query = "create table like_table like junit_sem_analysis"; - CommandProcessorResponse resp = hcatDriver.run(query); - assertEquals(0, resp.getResponseCode()); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; + + hcatDriver.run(query); + String likeTbl = "like_table"; + hcatDriver.run("drop table " + likeTbl); + query = "create table like_table like junit_sem_analysis"; + CommandProcessorResponse resp = hcatDriver.run(query); + assertEquals(0, resp.getResponseCode()); // Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, likeTbl); // assertEquals(likeTbl,tbl.getTableName()); // List cols = tbl.getSd().getCols(); @@ -401,7 +401,7 @@ public void testCTLPass() throws IOException, MetaException, TException, NoSuchO // // hcatDriver.run("drop table junit_sem_analysis"); // hcatDriver.run("drop table "+likeTbl); - } + } // This test case currently fails, since add partitions don't inherit anything from tables. diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestUseDatabase.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestUseDatabase.java index 42442ae..f362b69 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestUseDatabase.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestUseDatabase.java @@ -34,44 +34,44 @@ /* Unit test for GitHub Howl issue #3 */ public class TestUseDatabase extends TestCase { - private Driver hcatDriver; + private Driver hcatDriver; - @Override - protected void setUp() throws Exception { + @Override + protected void setUp() throws Exception { - HiveConf hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + HiveConf hcatConf = new HiveConf(this.getClass()); + hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); + } - String query; - private final String dbName = "testUseDatabase_db"; - private final String tblName = "testUseDatabase_tbl"; + String query; + private final String dbName = "testUseDatabase_db"; + private final String tblName = "testUseDatabase_tbl"; - public void testAlterTablePass() throws IOException, CommandNeedRetryException { + public void testAlterTablePass() throws IOException, CommandNeedRetryException { - hcatDriver.run("create database " + dbName); - hcatDriver.run("use " + dbName); - hcatDriver.run("create table " + tblName + " (a int) partitioned by (b string) stored as RCFILE"); + hcatDriver.run("create database " + dbName); + hcatDriver.run("use " + dbName); + hcatDriver.run("create table " + tblName + " (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response; + CommandProcessorResponse response; - response = hcatDriver.run("alter table " + tblName + " add partition (b='2') location '/tmp'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); + response = hcatDriver.run("alter table " + tblName + " add partition (b='2') location '/tmp'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); - response = hcatDriver.run("alter table " + tblName + " set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); + response = hcatDriver.run("alter table " + tblName + " set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); - hcatDriver.run("drop table " + tblName); - hcatDriver.run("drop database " + dbName); - } + hcatDriver.run("drop table " + tblName); + hcatDriver.run("drop database " + dbName); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java index 8082d4a..102d6d2 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java @@ -42,142 +42,142 @@ public class TestHCatUtil { - @Test - public void testFsPermissionOperation() { - - HashMap permsCode = new HashMap(); - - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) { - for (int k = 0; k < 8; k++) { - StringBuilder sb = new StringBuilder(); - sb.append("0"); - sb.append(i); - sb.append(j); - sb.append(k); - Integer code = (((i * 8) + j) * 8) + k; - String perms = (new FsPermission(Short.decode(sb.toString()))).toString(); - if (permsCode.containsKey(perms)) { - Assert.assertEquals("permissions(" + perms + ") mapped to multiple codes", code, permsCode.get(perms)); - } - permsCode.put(perms, code); - assertFsPermissionTransformationIsGood(perms); - } - } + @Test + public void testFsPermissionOperation() { + + HashMap permsCode = new HashMap(); + + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + for (int k = 0; k < 8; k++) { + StringBuilder sb = new StringBuilder(); + sb.append("0"); + sb.append(i); + sb.append(j); + sb.append(k); + Integer code = (((i * 8) + j) * 8) + k; + String perms = (new FsPermission(Short.decode(sb.toString()))).toString(); + if (permsCode.containsKey(perms)) { + Assert.assertEquals("permissions(" + perms + ") mapped to multiple codes", code, permsCode.get(perms)); + } + permsCode.put(perms, code); + assertFsPermissionTransformationIsGood(perms); } + } } - - private void assertFsPermissionTransformationIsGood(String perms) { - Assert.assertEquals(perms, FsPermission.valueOf("-" + perms).toString()); - } - - @Test - public void testValidateMorePermissive() { - assertConsistentFsPermissionBehaviour(FsAction.ALL, true, true, true, true, true, true, true, true); - assertConsistentFsPermissionBehaviour(FsAction.READ, false, true, false, true, false, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.WRITE, false, true, false, false, true, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.EXECUTE, false, true, true, false, false, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.READ_EXECUTE, false, true, true, true, false, true, false, false); - assertConsistentFsPermissionBehaviour(FsAction.READ_WRITE, false, true, false, true, true, false, true, false); - assertConsistentFsPermissionBehaviour(FsAction.WRITE_EXECUTE, false, true, true, false, true, false, false, true); - assertConsistentFsPermissionBehaviour(FsAction.NONE, false, true, false, false, false, false, false, false); - } - - - private void assertConsistentFsPermissionBehaviour( - FsAction base, boolean versusAll, boolean versusNone, - boolean versusX, boolean versusR, boolean versusW, - boolean versusRX, boolean versusRW, boolean versusWX) { - - Assert.assertTrue(versusAll == HCatUtil.validateMorePermissive(base, FsAction.ALL)); - Assert.assertTrue(versusX == HCatUtil.validateMorePermissive(base, FsAction.EXECUTE)); - Assert.assertTrue(versusNone == HCatUtil.validateMorePermissive(base, FsAction.NONE)); - Assert.assertTrue(versusR == HCatUtil.validateMorePermissive(base, FsAction.READ)); - Assert.assertTrue(versusRX == HCatUtil.validateMorePermissive(base, FsAction.READ_EXECUTE)); - Assert.assertTrue(versusRW == HCatUtil.validateMorePermissive(base, FsAction.READ_WRITE)); - Assert.assertTrue(versusW == HCatUtil.validateMorePermissive(base, FsAction.WRITE)); - Assert.assertTrue(versusWX == HCatUtil.validateMorePermissive(base, FsAction.WRITE_EXECUTE)); - } - - @Test - public void testExecutePermissionsCheck() { - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.ALL)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.NONE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.EXECUTE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_EXECUTE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE_EXECUTE)); - - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ)); - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE)); - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_WRITE)); - - } - - @Test - public void testGetTableSchemaWithPtnColsApi() throws IOException { - // Check the schema of a table with one field & no partition keys. - StorageDescriptor sd = new StorageDescriptor( - Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), - "location", "org.apache.hadoop.mapred.TextInputFormat", - "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), - new ArrayList(), new ArrayList(), new HashMap()); - org.apache.hadoop.hive.metastore.api.Table apiTable = - new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", - 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); - Table table = new Table(apiTable); - - List expectedHCatSchema = - Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null)); - - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - - // Add a partition key & ensure its reflected in the schema. - List partitionKeys = - Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null)); - table.getTTable().setPartitionKeys(partitionKeys); - expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null)); - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - } - - /** - * Hive represents tables in two ways: - *
    - *
  • org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore
  • - *
  • org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table
  • - *
- * Here we check SerDe-reported fields are included in the table schema. - */ - @Test - public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException { - Map parameters = Maps.newHashMap(); - parameters.put(serdeConstants.SERIALIZATION_CLASS, - "org.apache.hadoop.hive.serde2.thrift.test.IntString"); - parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol"); - - SerDeInfo serDeInfo = new SerDeInfo(null, - "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters); - - // StorageDescriptor has an empty list of fields - SerDe will report them. - StorageDescriptor sd = new StorageDescriptor(new ArrayList(), "location", - "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", - false, -1, serDeInfo, new ArrayList(), new ArrayList(), - new HashMap()); - - org.apache.hadoop.hive.metastore.api.Table apiTable = - new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", - 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); - Table table = new Table(apiTable); - - List expectedHCatSchema = Lists.newArrayList( - new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), - new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), - new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null)); - - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - } + } + + private void assertFsPermissionTransformationIsGood(String perms) { + Assert.assertEquals(perms, FsPermission.valueOf("-" + perms).toString()); + } + + @Test + public void testValidateMorePermissive() { + assertConsistentFsPermissionBehaviour(FsAction.ALL, true, true, true, true, true, true, true, true); + assertConsistentFsPermissionBehaviour(FsAction.READ, false, true, false, true, false, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.WRITE, false, true, false, false, true, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.EXECUTE, false, true, true, false, false, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.READ_EXECUTE, false, true, true, true, false, true, false, false); + assertConsistentFsPermissionBehaviour(FsAction.READ_WRITE, false, true, false, true, true, false, true, false); + assertConsistentFsPermissionBehaviour(FsAction.WRITE_EXECUTE, false, true, true, false, true, false, false, true); + assertConsistentFsPermissionBehaviour(FsAction.NONE, false, true, false, false, false, false, false, false); + } + + + private void assertConsistentFsPermissionBehaviour( + FsAction base, boolean versusAll, boolean versusNone, + boolean versusX, boolean versusR, boolean versusW, + boolean versusRX, boolean versusRW, boolean versusWX) { + + Assert.assertTrue(versusAll == HCatUtil.validateMorePermissive(base, FsAction.ALL)); + Assert.assertTrue(versusX == HCatUtil.validateMorePermissive(base, FsAction.EXECUTE)); + Assert.assertTrue(versusNone == HCatUtil.validateMorePermissive(base, FsAction.NONE)); + Assert.assertTrue(versusR == HCatUtil.validateMorePermissive(base, FsAction.READ)); + Assert.assertTrue(versusRX == HCatUtil.validateMorePermissive(base, FsAction.READ_EXECUTE)); + Assert.assertTrue(versusRW == HCatUtil.validateMorePermissive(base, FsAction.READ_WRITE)); + Assert.assertTrue(versusW == HCatUtil.validateMorePermissive(base, FsAction.WRITE)); + Assert.assertTrue(versusWX == HCatUtil.validateMorePermissive(base, FsAction.WRITE_EXECUTE)); + } + + @Test + public void testExecutePermissionsCheck() { + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.ALL)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.NONE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.EXECUTE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_EXECUTE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE_EXECUTE)); + + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ)); + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE)); + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_WRITE)); + + } + + @Test + public void testGetTableSchemaWithPtnColsApi() throws IOException { + // Check the schema of a table with one field & no partition keys. + StorageDescriptor sd = new StorageDescriptor( + Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), + "location", "org.apache.hadoop.mapred.TextInputFormat", + "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), + new ArrayList(), new ArrayList(), new HashMap()); + org.apache.hadoop.hive.metastore.api.Table apiTable = + new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", + 0, 0, 0, sd, new ArrayList(), new HashMap(), + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + Table table = new Table(apiTable); + + List expectedHCatSchema = + Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null)); + + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + + // Add a partition key & ensure its reflected in the schema. + List partitionKeys = + Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null)); + table.getTTable().setPartitionKeys(partitionKeys); + expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null)); + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + } + + /** + * Hive represents tables in two ways: + *
    + *
  • org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore
  • + *
  • org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table
  • + *
+ * Here we check SerDe-reported fields are included in the table schema. + */ + @Test + public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException { + Map parameters = Maps.newHashMap(); + parameters.put(serdeConstants.SERIALIZATION_CLASS, + "org.apache.hadoop.hive.serde2.thrift.test.IntString"); + parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol"); + + SerDeInfo serDeInfo = new SerDeInfo(null, + "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters); + + // StorageDescriptor has an empty list of fields - SerDe will report them. + StorageDescriptor sd = new StorageDescriptor(new ArrayList(), "location", + "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", + false, -1, serDeInfo, new ArrayList(), new ArrayList(), + new HashMap()); + + org.apache.hadoop.hive.metastore.api.Table apiTable = + new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", + 0, 0, 0, sd, new ArrayList(), new HashMap(), + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + Table table = new Table(apiTable); + + List expectedHCatSchema = Lists.newArrayList( + new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), + new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), + new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null)); + + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java index e701f61..1705419 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java @@ -63,205 +63,205 @@ public class TestHiveClientCache { - private static final Logger LOG = LoggerFactory.getLogger(TestHiveClientCache.class); - final HiveConf hiveConf = new HiveConf(); - - @BeforeClass - public static void setUp() throws Exception { + private static final Logger LOG = LoggerFactory.getLogger(TestHiveClientCache.class); + final HiveConf hiveConf = new HiveConf(); + + @BeforeClass + public static void setUp() throws Exception { + } + + @AfterClass + public static void tearDown() throws Exception { + } + + @Test + public void testCacheHit() throws IOException, MetaException, LoginException { + + HiveClientCache cache = new HiveClientCache(1000); + HiveMetaStoreClient client = cache.get(hiveConf); + assertNotNull(client); + client.close(); // close shouldn't matter + + // Setting a non important configuration should return the same client only + hiveConf.setIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS, 10); + HiveMetaStoreClient client2 = cache.get(hiveConf); + assertNotNull(client2); + assertEquals(client, client2); + client2.close(); + } + + @Test + public void testCacheMiss() throws IOException, MetaException, LoginException { + HiveClientCache cache = new HiveClientCache(1000); + HiveMetaStoreClient client = cache.get(hiveConf); + assertNotNull(client); + + // Set different uri as it is one of the criteria deciding whether to return the same client or not + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different + HiveMetaStoreClient client2 = cache.get(hiveConf); + assertNotNull(client2); + assertNotSame(client, client2); + } + + /** + * Check that a new client is returned for the same configuration after the expiry time. + * Also verify that the expiry time configuration is honoured + */ + @Test + public void testCacheExpiry() throws IOException, MetaException, LoginException, InterruptedException { + HiveClientCache cache = new HiveClientCache(1); + HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + assertNotNull(client); + + Thread.sleep(2500); + HiveMetaStoreClient client2 = cache.get(hiveConf); + client.close(); + assertTrue(client.isClosed()); // close() after *expiry time* and *a cache access* should have tore down the client + + assertNotNull(client2); + assertNotSame(client, client2); + } + + /** + * Check that a *new* client is created if asked from different threads even with + * the same hive configuration + * @throws ExecutionException + * @throws InterruptedException + */ + @Test + public void testMultipleThreadAccess() throws ExecutionException, InterruptedException { + final HiveClientCache cache = new HiveClientCache(1000); + + class GetHiveClient implements Callable { + @Override + public HiveMetaStoreClient call() throws IOException, MetaException, LoginException { + return cache.get(hiveConf); + } } - @AfterClass - public static void tearDown() throws Exception { + ExecutorService executor = Executors.newFixedThreadPool(2); + + Callable worker1 = new GetHiveClient(); + Callable worker2 = new GetHiveClient(); + Future clientFuture1 = executor.submit(worker1); + Future clientFuture2 = executor.submit(worker2); + HiveMetaStoreClient client1 = clientFuture1.get(); + HiveMetaStoreClient client2 = clientFuture2.get(); + assertNotNull(client1); + assertNotNull(client2); + assertNotSame(client1, client2); + } + + @Test + public void testCloseAllClients() throws IOException, MetaException, LoginException { + final HiveClientCache cache = new HiveClientCache(1000); + HiveClientCache.CacheableHiveMetaStoreClient client1 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different + HiveClientCache.CacheableHiveMetaStoreClient client2 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + cache.closeAllClientsQuietly(); + assertTrue(client1.isClosed()); + assertTrue(client2.isClosed()); + } + + /** + * Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects + * and tells if the client is broken + */ + @Ignore("hangs indefinitely") + @Test + public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, + InvalidObjectException, NoSuchObjectException, InterruptedException { + // Setup + LocalMetaServer metaServer = new LocalMetaServer(); + metaServer.start(); + + final HiveClientCache cache = new HiveClientCache(1000); + HiveClientCache.CacheableHiveMetaStoreClient client = + (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf()); + + assertTrue(client.isOpen()); + + final String DB_NAME = "test_db"; + final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2); + + try { + client.dropTable(DB_NAME, LONG_TABLE_NAME); + } catch (Exception e) { } - - @Test - public void testCacheHit() throws IOException, MetaException, LoginException { - - HiveClientCache cache = new HiveClientCache(1000); - HiveMetaStoreClient client = cache.get(hiveConf); - assertNotNull(client); - client.close(); // close shouldn't matter - - // Setting a non important configuration should return the same client only - hiveConf.setIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS, 10); - HiveMetaStoreClient client2 = cache.get(hiveConf); - assertNotNull(client2); - assertEquals(client, client2); - client2.close(); + try { + client.dropDatabase(DB_NAME); + } catch (Exception e) { } - @Test - public void testCacheMiss() throws IOException, MetaException, LoginException { - HiveClientCache cache = new HiveClientCache(1000); - HiveMetaStoreClient client = cache.get(hiveConf); - assertNotNull(client); - - // Set different uri as it is one of the criteria deciding whether to return the same client or not - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different - HiveMetaStoreClient client2 = cache.get(hiveConf); - assertNotNull(client2); - assertNotSame(client, client2); + client.createDatabase(new Database(DB_NAME, "", null, null)); + + List fields = new ArrayList(); + fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); + Table tbl = new Table(); + tbl.setDbName(DB_NAME); + tbl.setTableName(LONG_TABLE_NAME); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(fields); + tbl.setSd(sd); + sd.setSerdeInfo(new SerDeInfo()); + + // Break the client + try { + client.createTable(tbl); + fail("Exception was expected while creating table with long name"); + } catch (Exception e) { } - /** - * Check that a new client is returned for the same configuration after the expiry time. - * Also verify that the expiry time configuration is honoured - */ - @Test - public void testCacheExpiry() throws IOException, MetaException, LoginException, InterruptedException { - HiveClientCache cache = new HiveClientCache(1); - HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - assertNotNull(client); - - Thread.sleep(2500); - HiveMetaStoreClient client2 = cache.get(hiveConf); - client.close(); - assertTrue(client.isClosed()); // close() after *expiry time* and *a cache access* should have tore down the client - - assertNotNull(client2); - assertNotSame(client, client2); + assertFalse(client.isOpen()); + metaServer.shutDown(); + } + + private static class LocalMetaServer implements Runnable { + public final int MS_PORT = 20101; + private final HiveConf hiveConf; + private final SecurityManager securityManager; + public final static int WAIT_TIME_FOR_BOOTUP = 30000; + + public LocalMetaServer() { + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + hiveConf = new HiveConf(TestHiveClientCache.class); + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + MS_PORT); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); } - /** - * Check that a *new* client is created if asked from different threads even with - * the same hive configuration - * @throws ExecutionException - * @throws InterruptedException - */ - @Test - public void testMultipleThreadAccess() throws ExecutionException, InterruptedException { - final HiveClientCache cache = new HiveClientCache(1000); - - class GetHiveClient implements Callable { - @Override - public HiveMetaStoreClient call() throws IOException, MetaException, LoginException { - return cache.get(hiveConf); - } - } - - ExecutorService executor = Executors.newFixedThreadPool(2); - - Callable worker1 = new GetHiveClient(); - Callable worker2 = new GetHiveClient(); - Future clientFuture1 = executor.submit(worker1); - Future clientFuture2 = executor.submit(worker2); - HiveMetaStoreClient client1 = clientFuture1.get(); - HiveMetaStoreClient client2 = clientFuture2.get(); - assertNotNull(client1); - assertNotNull(client2); - assertNotSame(client1, client2); + public void start() throws InterruptedException { + Thread thread = new Thread(this); + thread.start(); + Thread.sleep(WAIT_TIME_FOR_BOOTUP); // Wait for the server to bootup } - @Test - public void testCloseAllClients() throws IOException, MetaException, LoginException { - final HiveClientCache cache = new HiveClientCache(1000); - HiveClientCache.CacheableHiveMetaStoreClient client1 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different - HiveClientCache.CacheableHiveMetaStoreClient client2 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - cache.closeAllClientsQuietly(); - assertTrue(client1.isClosed()); - assertTrue(client2.isClosed()); + @Override + public void run() { + try { + HiveMetaStore.main(new String[]{"-v", "-p", String.valueOf(MS_PORT)}); + } catch (Throwable t) { + LOG.error("Exiting. Got exception from metastore: ", t); + } } - /** - * Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects - * and tells if the client is broken - */ - @Ignore("hangs indefinitely") - @Test - public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, - InvalidObjectException, NoSuchObjectException, InterruptedException { - // Setup - LocalMetaServer metaServer = new LocalMetaServer(); - metaServer.start(); - - final HiveClientCache cache = new HiveClientCache(1000); - HiveClientCache.CacheableHiveMetaStoreClient client = - (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf()); - - assertTrue(client.isOpen()); - - final String DB_NAME = "test_db"; - final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2); - - try { - client.dropTable(DB_NAME, LONG_TABLE_NAME); - } catch (Exception e) { - } - try { - client.dropDatabase(DB_NAME); - } catch (Exception e) { - } - - client.createDatabase(new Database(DB_NAME, "", null, null)); - - List fields = new ArrayList(); - fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); - Table tbl = new Table(); - tbl.setDbName(DB_NAME); - tbl.setTableName(LONG_TABLE_NAME); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(fields); - tbl.setSd(sd); - sd.setSerdeInfo(new SerDeInfo()); - - // Break the client - try { - client.createTable(tbl); - fail("Exception was expected while creating table with long name"); - } catch (Exception e) { - } - - assertFalse(client.isOpen()); - metaServer.shutDown(); + public HiveConf getHiveConf() { + return hiveConf; } - private static class LocalMetaServer implements Runnable { - public final int MS_PORT = 20101; - private final HiveConf hiveConf; - private final SecurityManager securityManager; - public final static int WAIT_TIME_FOR_BOOTUP = 30000; - - public LocalMetaServer() { - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - hiveConf = new HiveConf(TestHiveClientCache.class); - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + MS_PORT); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - } - - public void start() throws InterruptedException { - Thread thread = new Thread(this); - thread.start(); - Thread.sleep(WAIT_TIME_FOR_BOOTUP); // Wait for the server to bootup - } - - @Override - public void run() { - try { - HiveMetaStore.main(new String[]{"-v", "-p", String.valueOf(MS_PORT)}); - } catch (Throwable t) { - LOG.error("Exiting. Got exception from metastore: ", t); - } - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - public void shutDown() { - System.setSecurityManager(securityManager); - } + public void shutDown() { + System.setSecurityManager(securityManager); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/HCatDataCheckUtil.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/HCatDataCheckUtil.java index fe6a014..cde718f 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/HCatDataCheckUtil.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/HCatDataCheckUtil.java @@ -37,78 +37,78 @@ */ public class HCatDataCheckUtil { - private static final Logger LOG = LoggerFactory.getLogger(HCatDataCheckUtil.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatDataCheckUtil.class); - public static Driver instantiateDriver(MiniCluster cluster) { - HiveConf hiveConf = new HiveConf(HCatDataCheckUtil.class); - for (Entry e : cluster.getProperties().entrySet()) { - hiveConf.set(e.getKey().toString(), e.getValue().toString()); - } - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - LOG.debug("Hive conf : {}", hiveConf.getAllProperties()); - Driver driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - return driver; - } - - public static void generateDataFile(MiniCluster cluster, String fileName) throws IOException { - MiniCluster.deleteFile(cluster, fileName); - String[] input = new String[50]; - for (int i = 0; i < 50; i++) { - input[i] = (i % 5) + "\t" + i + "\t" + "_S" + i + "S_"; - } - MiniCluster.createInputFile(cluster, fileName, input); + public static Driver instantiateDriver(MiniCluster cluster) { + HiveConf hiveConf = new HiveConf(HCatDataCheckUtil.class); + for (Entry e : cluster.getProperties().entrySet()) { + hiveConf.set(e.getKey().toString(), e.getValue().toString()); } - - public static void createTable(Driver driver, String tableName, String createTableArgs) - throws CommandNeedRetryException, IOException { - String createTable = "create table " + tableName + createTableArgs; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + LOG.debug("Hive conf : {}", hiveConf.getAllProperties()); + Driver driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + return driver; + } + + public static void generateDataFile(MiniCluster cluster, String fileName) throws IOException { + MiniCluster.deleteFile(cluster, fileName); + String[] input = new String[50]; + for (int i = 0; i < 50; i++) { + input[i] = (i % 5) + "\t" + i + "\t" + "_S" + i + "S_"; } - - public static void dropTable(Driver driver, String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table if exists " + tablename); + MiniCluster.createInputFile(cluster, fileName, input); + } + + public static void createTable(Driver driver, String tableName, String createTableArgs) + throws CommandNeedRetryException, IOException { + String createTable = "create table " + tableName + createTableArgs; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); } - - public static ArrayList formattedRun(Driver driver, String name, String selectCmd) - throws CommandNeedRetryException, IOException { - driver.run(selectCmd); - ArrayList src_values = new ArrayList(); - driver.getResults(src_values); - LOG.info("{} : {}", name, src_values); - return src_values; - } - - - public static boolean recordsEqual(HCatRecord first, HCatRecord second) { - return (compareRecords(first, second) == 0); - } - - public static int compareRecords(HCatRecord first, HCatRecord second) { - return compareRecordContents(first.getAll(), second.getAll()); - } - - public static int compareRecordContents(List first, List second) { - int mySz = first.size(); - int urSz = second.size(); - if (mySz != urSz) { - return mySz - urSz; - } else { - for (int i = 0; i < first.size(); i++) { - int c = DataType.compare(first.get(i), second.get(i)); - if (c != 0) { - return c; - } - } - return 0; + } + + public static void dropTable(Driver driver, String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table if exists " + tablename); + } + + public static ArrayList formattedRun(Driver driver, String name, String selectCmd) + throws CommandNeedRetryException, IOException { + driver.run(selectCmd); + ArrayList src_values = new ArrayList(); + driver.getResults(src_values); + LOG.info("{} : {}", name, src_values); + return src_values; + } + + + public static boolean recordsEqual(HCatRecord first, HCatRecord second) { + return (compareRecords(first, second) == 0); + } + + public static int compareRecords(HCatRecord first, HCatRecord second) { + return compareRecordContents(first.getAll(), second.getAll()); + } + + public static int compareRecordContents(List first, List second) { + int mySz = first.size(); + int urSz = second.size(); + if (mySz != urSz) { + return mySz - urSz; + } else { + for (int i = 0; i < first.size(); i++) { + int c = DataType.compare(first.get(i), second.get(i)); + if (c != 0) { + return c; } + } + return 0; } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestDefaultHCatRecord.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestDefaultHCatRecord.java index efbbd0e..fdc4efb 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestDefaultHCatRecord.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestDefaultHCatRecord.java @@ -42,219 +42,219 @@ public class TestDefaultHCatRecord extends TestCase { - public void testRYW() throws IOException { + public void testRYW() throws IOException { - File f = new File("binary.dat"); - f.delete(); - f.createNewFile(); - f.deleteOnExit(); + File f = new File("binary.dat"); + f.delete(); + f.createNewFile(); + f.deleteOnExit(); - OutputStream fileOutStream = new FileOutputStream(f); - DataOutput outStream = new DataOutputStream(fileOutStream); - - HCatRecord[] recs = getHCatRecords(); - for (int i = 0; i < recs.length; i++) { - recs[i].write(outStream); - } - fileOutStream.flush(); - fileOutStream.close(); - - InputStream fInStream = new FileInputStream(f); - DataInput inpStream = new DataInputStream(fInStream); - - for (int i = 0; i < recs.length; i++) { - HCatRecord rec = new DefaultHCatRecord(); - rec.readFields(inpStream); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[i], rec)); - } - - Assert.assertEquals(fInStream.available(), 0); - fInStream.close(); + OutputStream fileOutStream = new FileOutputStream(f); + DataOutput outStream = new DataOutputStream(fileOutStream); + HCatRecord[] recs = getHCatRecords(); + for (int i = 0; i < recs.length; i++) { + recs[i].write(outStream); } + fileOutStream.flush(); + fileOutStream.close(); - public void testCompareTo() { - HCatRecord[] recs = getHCatRecords(); - Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[0], recs[1]) == 0); - Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[4], recs[5]) == 0); - } + InputStream fInStream = new FileInputStream(f); + DataInput inpStream = new DataInputStream(fInStream); - public void testEqualsObject() { - - HCatRecord[] recs = getHCatRecords(); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[0], recs[1])); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[4], recs[5])); + for (int i = 0; i < recs.length; i++) { + HCatRecord rec = new DefaultHCatRecord(); + rec.readFields(inpStream); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[i], rec)); } - /** - * Test get and set calls with type - * @throws HCatException - */ - public void testGetSetByType1() throws HCatException { - HCatRecord inpRec = getHCatRecords()[0]; - HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); - HCatSchema hsch = - HCatSchemaUtils.getHCatSchema( - "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string"); - - - newRec.setByte("a", hsch, inpRec.getByte("a", hsch)); - newRec.setShort("b", hsch, inpRec.getShort("b", hsch)); - newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch)); - newRec.setLong("d", hsch, inpRec.getLong("d", hsch)); - newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch)); - newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch)); - newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch)); - newRec.setString("h", hsch, inpRec.getString("h", hsch)); - newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch)); - newRec.setString("j", hsch, inpRec.getString("j", hsch)); - - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); - - - } - - /** - * Test get and set calls with type - * @throws HCatException - */ - public void testGetSetByType2() throws HCatException { - HCatRecord inpRec = getGetSet2InpRec(); - - HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); - HCatSchema hsch = - HCatSchemaUtils.getHCatSchema("a:binary,b:map,c:array,d:struct"); - - - newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch)); - newRec.setMap("b", hsch, inpRec.getMap("b", hsch)); - newRec.setList("c", hsch, inpRec.getList("c", hsch)); - newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch)); - - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); - } - - - private HCatRecord getGetSet2InpRec() { - List rlist = new ArrayList(); - - rlist.add(new byte[]{1, 2, 3}); - - Map mapcol = new HashMap(3); - mapcol.put(new Short("2"), "hcat is cool"); - mapcol.put(new Short("3"), "is it?"); - mapcol.put(new Short("4"), "or is it not?"); - rlist.add(mapcol); - - List listcol = new ArrayList(); - listcol.add(314); - listcol.add(007); - rlist.add(listcol);//list - rlist.add(listcol);//struct - return new DefaultHCatRecord(rlist); - } - - private HCatRecord[] getHCatRecords() { - - List rec_1 = new ArrayList(8); - rec_1.add(new Byte("123")); - rec_1.add(new Short("456")); - rec_1.add(new Integer(789)); - rec_1.add(new Long(1000L)); - rec_1.add(new Float(5.3F)); - rec_1.add(new Double(5.3D)); - rec_1.add(new Boolean(true)); - rec_1.add(new String("hcat and hadoop")); - rec_1.add(null); - rec_1.add("null"); - - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - rec_2.add(new Byte("123")); - rec_2.add(new Short("456")); - rec_2.add(new Integer(789)); - rec_2.add(new Long(1000L)); - rec_2.add(new Float(5.3F)); - rec_2.add(new Double(5.3D)); - rec_2.add(new Boolean(true)); - rec_2.add(new String("hcat and hadoop")); - rec_2.add(null); - rec_2.add("null"); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - List rec_3 = new ArrayList(10); - rec_3.add(new Byte("123")); - rec_3.add(new Short("456")); - rec_3.add(new Integer(789)); - rec_3.add(new Long(1000L)); - rec_3.add(new Double(5.3D)); - rec_3.add(new String("hcat and hadoop")); - rec_3.add(null); - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rec_3.add(innerList); - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rec_3.add(map); - - HCatRecord tup_3 = new DefaultHCatRecord(rec_3); - - List rec_4 = new ArrayList(8); - rec_4.add(new Byte("123")); - rec_4.add(new Short("456")); - rec_4.add(new Integer(789)); - rec_4.add(new Long(1000L)); - rec_4.add(new Double(5.3D)); - rec_4.add(new String("hcat and hadoop")); - rec_4.add(null); - rec_4.add("null"); - - Map map2 = new HashMap(3); - map2.put(new Short("2"), "hcat is cool"); - map2.put(new Short("3"), "is it?"); - map2.put(new Short("4"), "or is it not?"); - rec_4.add(map2); - List innerList2 = new ArrayList(); - innerList2.add(314); - innerList2.add(007); - rec_4.add(innerList2); - HCatRecord tup_4 = new DefaultHCatRecord(rec_4); - - - List rec_5 = new ArrayList(3); - rec_5.add(getByteArray()); - rec_5.add(getStruct()); - rec_5.add(getList()); - HCatRecord tup_5 = new DefaultHCatRecord(rec_5); - - - List rec_6 = new ArrayList(3); - rec_6.add(getByteArray()); - rec_6.add(getStruct()); - rec_6.add(getList()); - HCatRecord tup_6 = new DefaultHCatRecord(rec_6); - - - return new HCatRecord[]{tup_1, tup_2, tup_3, tup_4, tup_5, tup_6}; - - } - - private Object getList() { - return getStruct(); - } - - private Object getByteArray() { - return new byte[]{1, 2, 3, 4}; - } - - private List getStruct() { - List struct = new ArrayList(); - struct.add(new Integer(1)); - struct.add(new String("x")); - return struct; - } + Assert.assertEquals(fInStream.available(), 0); + fInStream.close(); + + } + + public void testCompareTo() { + HCatRecord[] recs = getHCatRecords(); + Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[0], recs[1]) == 0); + Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[4], recs[5]) == 0); + } + + public void testEqualsObject() { + + HCatRecord[] recs = getHCatRecords(); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[0], recs[1])); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[4], recs[5])); + } + + /** + * Test get and set calls with type + * @throws HCatException + */ + public void testGetSetByType1() throws HCatException { + HCatRecord inpRec = getHCatRecords()[0]; + HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); + HCatSchema hsch = + HCatSchemaUtils.getHCatSchema( + "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string"); + + + newRec.setByte("a", hsch, inpRec.getByte("a", hsch)); + newRec.setShort("b", hsch, inpRec.getShort("b", hsch)); + newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch)); + newRec.setLong("d", hsch, inpRec.getLong("d", hsch)); + newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch)); + newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch)); + newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch)); + newRec.setString("h", hsch, inpRec.getString("h", hsch)); + newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch)); + newRec.setString("j", hsch, inpRec.getString("j", hsch)); + + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); + + + } + + /** + * Test get and set calls with type + * @throws HCatException + */ + public void testGetSetByType2() throws HCatException { + HCatRecord inpRec = getGetSet2InpRec(); + + HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); + HCatSchema hsch = + HCatSchemaUtils.getHCatSchema("a:binary,b:map,c:array,d:struct"); + + + newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch)); + newRec.setMap("b", hsch, inpRec.getMap("b", hsch)); + newRec.setList("c", hsch, inpRec.getList("c", hsch)); + newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch)); + + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); + } + + + private HCatRecord getGetSet2InpRec() { + List rlist = new ArrayList(); + + rlist.add(new byte[]{1, 2, 3}); + + Map mapcol = new HashMap(3); + mapcol.put(new Short("2"), "hcat is cool"); + mapcol.put(new Short("3"), "is it?"); + mapcol.put(new Short("4"), "or is it not?"); + rlist.add(mapcol); + + List listcol = new ArrayList(); + listcol.add(314); + listcol.add(007); + rlist.add(listcol);//list + rlist.add(listcol);//struct + return new DefaultHCatRecord(rlist); + } + + private HCatRecord[] getHCatRecords() { + + List rec_1 = new ArrayList(8); + rec_1.add(new Byte("123")); + rec_1.add(new Short("456")); + rec_1.add(new Integer(789)); + rec_1.add(new Long(1000L)); + rec_1.add(new Float(5.3F)); + rec_1.add(new Double(5.3D)); + rec_1.add(new Boolean(true)); + rec_1.add(new String("hcat and hadoop")); + rec_1.add(null); + rec_1.add("null"); + + HCatRecord tup_1 = new DefaultHCatRecord(rec_1); + + List rec_2 = new ArrayList(8); + rec_2.add(new Byte("123")); + rec_2.add(new Short("456")); + rec_2.add(new Integer(789)); + rec_2.add(new Long(1000L)); + rec_2.add(new Float(5.3F)); + rec_2.add(new Double(5.3D)); + rec_2.add(new Boolean(true)); + rec_2.add(new String("hcat and hadoop")); + rec_2.add(null); + rec_2.add("null"); + HCatRecord tup_2 = new DefaultHCatRecord(rec_2); + + List rec_3 = new ArrayList(10); + rec_3.add(new Byte("123")); + rec_3.add(new Short("456")); + rec_3.add(new Integer(789)); + rec_3.add(new Long(1000L)); + rec_3.add(new Double(5.3D)); + rec_3.add(new String("hcat and hadoop")); + rec_3.add(null); + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rec_3.add(innerList); + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rec_3.add(map); + + HCatRecord tup_3 = new DefaultHCatRecord(rec_3); + + List rec_4 = new ArrayList(8); + rec_4.add(new Byte("123")); + rec_4.add(new Short("456")); + rec_4.add(new Integer(789)); + rec_4.add(new Long(1000L)); + rec_4.add(new Double(5.3D)); + rec_4.add(new String("hcat and hadoop")); + rec_4.add(null); + rec_4.add("null"); + + Map map2 = new HashMap(3); + map2.put(new Short("2"), "hcat is cool"); + map2.put(new Short("3"), "is it?"); + map2.put(new Short("4"), "or is it not?"); + rec_4.add(map2); + List innerList2 = new ArrayList(); + innerList2.add(314); + innerList2.add(007); + rec_4.add(innerList2); + HCatRecord tup_4 = new DefaultHCatRecord(rec_4); + + + List rec_5 = new ArrayList(3); + rec_5.add(getByteArray()); + rec_5.add(getStruct()); + rec_5.add(getList()); + HCatRecord tup_5 = new DefaultHCatRecord(rec_5); + + + List rec_6 = new ArrayList(3); + rec_6.add(getByteArray()); + rec_6.add(getStruct()); + rec_6.add(getList()); + HCatRecord tup_6 = new DefaultHCatRecord(rec_6); + + + return new HCatRecord[]{tup_1, tup_2, tup_3, tup_4, tup_5, tup_6}; + + } + + private Object getList() { + return getStruct(); + } + + private Object getByteArray() { + return new byte[]{1, 2, 3, 4}; + } + + private List getStruct() { + List struct = new ArrayList(); + struct.add(new Integer(1)); + struct.add(new String("x")); + return struct; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java index 3d14a1f..e84b789 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java @@ -37,133 +37,133 @@ public class TestHCatRecordSerDe extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatRecordSerDe.class); - - public Map getData() { - Map data = new HashMap(); - - List rlist = new ArrayList(11); - rlist.add(new Byte("123")); - rlist.add(new Short("456")); - rlist.add(new Integer(789)); - rlist.add(new Long(1000L)); - rlist.add(new Double(5.3D)); - rlist.add(new Float(2.39F)); - rlist.add(new String("hcat and hadoop")); - rlist.add(null); - - List innerStruct = new ArrayList(2); - innerStruct.add(new String("abc")); - innerStruct.add(new String("def")); - rlist.add(innerStruct); - - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rlist.add(innerList); - - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rlist.add(map); - - rlist.add(new Boolean(true)); - - List c1 = new ArrayList(); - List c1_1 = new ArrayList(); - c1_1.add(new Integer(12)); - List i2 = new ArrayList(); - List ii1 = new ArrayList(); - ii1.add(new Integer(13)); - ii1.add(new Integer(14)); - i2.add(ii1); - Map> ii2 = new HashMap>(); - List iii1 = new ArrayList(); - iii1.add(new Integer(15)); - ii2.put("phew", iii1); - i2.add(ii2); - c1_1.add(i2); - c1.add(c1_1); - rlist.add(c1); - List am = new ArrayList(); - Map am_1 = new HashMap(); - am_1.put("noo", "haha"); - am.add(am_1); - rlist.add(am); - List aa = new ArrayList(); - List aa_1 = new ArrayList(); - aa_1.add("bloo"); - aa_1.add("bwahaha"); - aa.add(aa_1); - rlist.add(aa); - - String typeString = - "tinyint,smallint,int,bigint,double,float,string,string," - + "struct,array,map,boolean," - + "array,ii2:map>>>>," - + "array>,array>"; - Properties props = new Properties(); - - props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1,am,aa"); - props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatRecordSerDe.class); + + public Map getData() { + Map data = new HashMap(); + + List rlist = new ArrayList(11); + rlist.add(new Byte("123")); + rlist.add(new Short("456")); + rlist.add(new Integer(789)); + rlist.add(new Long(1000L)); + rlist.add(new Double(5.3D)); + rlist.add(new Float(2.39F)); + rlist.add(new String("hcat and hadoop")); + rlist.add(null); + + List innerStruct = new ArrayList(2); + innerStruct.add(new String("abc")); + innerStruct.add(new String("def")); + rlist.add(innerStruct); + + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rlist.add(innerList); + + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rlist.add(map); + + rlist.add(new Boolean(true)); + + List c1 = new ArrayList(); + List c1_1 = new ArrayList(); + c1_1.add(new Integer(12)); + List i2 = new ArrayList(); + List ii1 = new ArrayList(); + ii1.add(new Integer(13)); + ii1.add(new Integer(14)); + i2.add(ii1); + Map> ii2 = new HashMap>(); + List iii1 = new ArrayList(); + iii1.add(new Integer(15)); + ii2.put("phew", iii1); + i2.add(ii2); + c1_1.add(i2); + c1.add(c1_1); + rlist.add(c1); + List am = new ArrayList(); + Map am_1 = new HashMap(); + am_1.put("noo", "haha"); + am.add(am_1); + rlist.add(am); + List aa = new ArrayList(); + List aa_1 = new ArrayList(); + aa_1.add("bloo"); + aa_1.add("bwahaha"); + aa.add(aa_1); + rlist.add(aa); + + String typeString = + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct,array,map,boolean," + + "array,ii2:map>>>>," + + "array>,array>"; + Properties props = new Properties(); + + props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1,am,aa"); + props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); // props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); // props.put(Constants.SERIALIZATION_FORMAT, "1"); - data.put(props, new DefaultHCatRecord(rlist)); - return data; - } - - public void testRW() throws Exception { + data.put(props, new DefaultHCatRecord(rlist)); + return data; + } - Configuration conf = new Configuration(); + public void testRW() throws Exception { - for (Entry e : getData().entrySet()) { - Properties tblProps = e.getKey(); - HCatRecord r = e.getValue(); + Configuration conf = new Configuration(); - HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); + for (Entry e : getData().entrySet()) { + Properties tblProps = e.getKey(); + HCatRecord r = e.getValue(); - LOG.info("ORIG: {}", r); + HCatRecordSerDe hrsd = new HCatRecordSerDe(); + hrsd.initialize(conf, tblProps); - Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); - LOG.info("ONE: {}", s); + LOG.info("ORIG: {}", r); - HCatRecord r2 = (HCatRecord) hrsd.deserialize(s); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2)); + Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); + LOG.info("ONE: {}", s); - // If it went through correctly, then s is also a HCatRecord, - // and also equal to the above, and a deepcopy, and this holds - // through for multiple levels more of serialization as well. + HCatRecord r2 = (HCatRecord) hrsd.deserialize(s); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2)); - Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector()); - LOG.info("TWO: {}", s2); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s)); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2)); + // If it went through correctly, then s is also a HCatRecord, + // and also equal to the above, and a deepcopy, and this holds + // through for multiple levels more of serialization as well. - // serialize using another serde, and read out that object repr. - LazySimpleSerDe testSD = new LazySimpleSerDe(); - testSD.initialize(conf, tblProps); + Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector()); + LOG.info("TWO: {}", s2); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s)); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2)); - Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); - LOG.info("THREE: {}", s3); - Object o3 = testSD.deserialize(s3); - Assert.assertFalse(r.getClass().equals(o3.getClass())); + // serialize using another serde, and read out that object repr. + LazySimpleSerDe testSD = new LazySimpleSerDe(); + testSD.initialize(conf, tblProps); - // then serialize again using hrsd, and compare results - HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector()); - LOG.info("FOUR: {}", s4); + Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); + LOG.info("THREE: {}", s3); + Object o3 = testSD.deserialize(s3); + Assert.assertFalse(r.getClass().equals(o3.getClass())); - // Test LazyHCatRecord init and read - LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector()); - LOG.info("FIVE: {}", s5); + // then serialize again using hrsd, and compare results + HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector()); + LOG.info("FOUR: {}", s4); - LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector()); - LOG.info("SIX: {}", s6); + // Test LazyHCatRecord init and read + LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector()); + LOG.info("FIVE: {}", s5); - } + LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector()); + LOG.info("SIX: {}", s6); } + } + } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java index cccd3f1..c8aff6f 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java @@ -35,180 +35,180 @@ public class TestJsonSerDe extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestJsonSerDe.class); - - public List> getData() { - List> data = new ArrayList>(); - - List rlist = new ArrayList(13); - rlist.add(new Byte("123")); - rlist.add(new Short("456")); - rlist.add(new Integer(789)); - rlist.add(new Long(1000L)); - rlist.add(new Double(5.3D)); - rlist.add(new Float(2.39F)); - rlist.add(new String("hcat and hadoop")); - rlist.add(null); - - List innerStruct = new ArrayList(2); - innerStruct.add(new String("abc")); - innerStruct.add(new String("def")); - rlist.add(innerStruct); - - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rlist.add(innerList); - - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rlist.add(map); - - rlist.add(new Boolean(true)); - - List c1 = new ArrayList(); - List c1_1 = new ArrayList(); - c1_1.add(new Integer(12)); - List i2 = new ArrayList(); - List ii1 = new ArrayList(); - ii1.add(new Integer(13)); - ii1.add(new Integer(14)); - i2.add(ii1); - Map> ii2 = new HashMap>(); - List iii1 = new ArrayList(); - iii1.add(new Integer(15)); - ii2.put("phew", iii1); - i2.add(ii2); - c1_1.add(i2); - c1.add(c1_1); - rlist.add(c1); - - List nlist = new ArrayList(13); - nlist.add(null); // tinyint - nlist.add(null); // smallint - nlist.add(null); // int - nlist.add(null); // bigint - nlist.add(null); // double - nlist.add(null); // float - nlist.add(null); // string - nlist.add(null); // string - nlist.add(null); // struct - nlist.add(null); // array - nlist.add(null); // map - nlist.add(null); // bool - nlist.add(null); // complex - - String typeString = - "tinyint,smallint,int,bigint,double,float,string,string," - + "struct,array,map,boolean," - + "array,ii2:map>>>>"; - Properties props = new Properties(); - - props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1"); - props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); + private static final Logger LOG = LoggerFactory.getLogger(TestJsonSerDe.class); + + public List> getData() { + List> data = new ArrayList>(); + + List rlist = new ArrayList(13); + rlist.add(new Byte("123")); + rlist.add(new Short("456")); + rlist.add(new Integer(789)); + rlist.add(new Long(1000L)); + rlist.add(new Double(5.3D)); + rlist.add(new Float(2.39F)); + rlist.add(new String("hcat and hadoop")); + rlist.add(null); + + List innerStruct = new ArrayList(2); + innerStruct.add(new String("abc")); + innerStruct.add(new String("def")); + rlist.add(innerStruct); + + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rlist.add(innerList); + + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rlist.add(map); + + rlist.add(new Boolean(true)); + + List c1 = new ArrayList(); + List c1_1 = new ArrayList(); + c1_1.add(new Integer(12)); + List i2 = new ArrayList(); + List ii1 = new ArrayList(); + ii1.add(new Integer(13)); + ii1.add(new Integer(14)); + i2.add(ii1); + Map> ii2 = new HashMap>(); + List iii1 = new ArrayList(); + iii1.add(new Integer(15)); + ii2.put("phew", iii1); + i2.add(ii2); + c1_1.add(i2); + c1.add(c1_1); + rlist.add(c1); + + List nlist = new ArrayList(13); + nlist.add(null); // tinyint + nlist.add(null); // smallint + nlist.add(null); // int + nlist.add(null); // bigint + nlist.add(null); // double + nlist.add(null); // float + nlist.add(null); // string + nlist.add(null); // string + nlist.add(null); // struct + nlist.add(null); // array + nlist.add(null); // map + nlist.add(null); // bool + nlist.add(null); // complex + + String typeString = + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct,array,map,boolean," + + "array,ii2:map>>>>"; + Properties props = new Properties(); + + props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1"); + props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); // props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); // props.put(Constants.SERIALIZATION_FORMAT, "1"); - data.add(new Pair(props, new DefaultHCatRecord(rlist))); - data.add(new Pair(props, new DefaultHCatRecord(nlist))); - return data; - } - - public void testRW() throws Exception { + data.add(new Pair(props, new DefaultHCatRecord(rlist))); + data.add(new Pair(props, new DefaultHCatRecord(nlist))); + return data; + } - Configuration conf = new Configuration(); + public void testRW() throws Exception { - for (Pair e : getData()) { - Properties tblProps = e.first; - HCatRecord r = e.second; + Configuration conf = new Configuration(); - HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); + for (Pair e : getData()) { + Properties tblProps = e.first; + HCatRecord r = e.second; - JsonSerDe jsde = new JsonSerDe(); - jsde.initialize(conf, tblProps); + HCatRecordSerDe hrsd = new HCatRecordSerDe(); + hrsd.initialize(conf, tblProps); - LOG.info("ORIG:{}", r); + JsonSerDe jsde = new JsonSerDe(); + jsde.initialize(conf, tblProps); - Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); - LOG.info("ONE:{}", s); + LOG.info("ORIG:{}", r); - Object o1 = hrsd.deserialize(s); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1)); + Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); + LOG.info("ONE:{}", s); - Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector()); - LOG.info("TWO:{}", s2); - Object o2 = jsde.deserialize(s2); - LOG.info("deserialized TWO : {} ", o2); + Object o1 = hrsd.deserialize(s); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1)); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); - } + Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector()); + LOG.info("TWO:{}", s2); + Object o2 = jsde.deserialize(s2); + LOG.info("deserialized TWO : {} ", o2); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); } - public void testRobustRead() throws Exception { - /** - * This test has been added to account for HCATALOG-436 - * We write out columns with "internal column names" such - * as "_col0", but try to read with retular column names. - */ + } - Configuration conf = new Configuration(); + public void testRobustRead() throws Exception { + /** + * This test has been added to account for HCATALOG-436 + * We write out columns with "internal column names" such + * as "_col0", but try to read with retular column names. + */ - for (Pair e : getData()) { - Properties tblProps = e.first; - HCatRecord r = e.second; + Configuration conf = new Configuration(); - Properties internalTblProps = new Properties(); - for (Map.Entry pe : tblProps.entrySet()) { - if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) { - internalTblProps.put(pe.getKey(), pe.getValue()); - } else { - internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue())); - } - } + for (Pair e : getData()) { + Properties tblProps = e.first; + HCatRecord r = e.second; - LOG.info("orig tbl props:{}", tblProps); - LOG.info("modif tbl props:{}", internalTblProps); + Properties internalTblProps = new Properties(); + for (Map.Entry pe : tblProps.entrySet()) { + if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) { + internalTblProps.put(pe.getKey(), pe.getValue()); + } else { + internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue())); + } + } - JsonSerDe wjsd = new JsonSerDe(); - wjsd.initialize(conf, internalTblProps); + LOG.info("orig tbl props:{}", tblProps); + LOG.info("modif tbl props:{}", internalTblProps); - JsonSerDe rjsd = new JsonSerDe(); - rjsd.initialize(conf, tblProps); + JsonSerDe wjsd = new JsonSerDe(); + wjsd.initialize(conf, internalTblProps); - LOG.info("ORIG:{}", r); + JsonSerDe rjsd = new JsonSerDe(); + rjsd.initialize(conf, tblProps); - Writable s = wjsd.serialize(r, wjsd.getObjectInspector()); - LOG.info("ONE:{}", s); + LOG.info("ORIG:{}", r); - Object o1 = wjsd.deserialize(s); - LOG.info("deserialized ONE : {} ", o1); + Writable s = wjsd.serialize(r, wjsd.getObjectInspector()); + LOG.info("ONE:{}", s); - Object o2 = rjsd.deserialize(s); - LOG.info("deserialized TWO : {} ", o2); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); - } + Object o1 = wjsd.deserialize(s); + LOG.info("deserialized ONE : {} ", o1); + Object o2 = rjsd.deserialize(s); + LOG.info("deserialized TWO : {} ", o2); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); } - String getInternalNames(String columnNames) { - if (columnNames == null) { - return null; - } - if (columnNames.isEmpty()) { - return ""; - } + } - StringBuffer sb = new StringBuffer(); - int numStrings = columnNames.split(",").length; - sb.append("_col0"); - for (int i = 1; i < numStrings; i++) { - sb.append(","); - sb.append(HiveConf.getColumnInternalName(i)); - } - return sb.toString(); + String getInternalNames(String columnNames) { + if (columnNames == null) { + return null; + } + if (columnNames.isEmpty()) { + return ""; + } + + StringBuffer sb = new StringBuffer(); + int numStrings = columnNames.split(",").length; + sb.append("_col0"); + for (int i = 1; i < numStrings; i++) { + sb.append(","); + sb.append(HiveConf.getColumnInternalName(i)); } + return sb.toString(); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestLazyHCatRecord.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestLazyHCatRecord.java index 6d6f35a..9b36ddf 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestLazyHCatRecord.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestLazyHCatRecord.java @@ -32,162 +32,162 @@ public class TestLazyHCatRecord { - private final int INT_CONST = 789; - private final long LONG_CONST = 5000000000L; - private final double DOUBLE_CONST = 3.141592654; - private final String STRING_CONST = "hello world"; - - @Test - public void testGet() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get(3)); + private final int INT_CONST = 789; + private final long LONG_CONST = 5000000000L; + private final double DOUBLE_CONST = 3.141592654; + private final String STRING_CONST = "hello world"; + + @Test + public void testGet() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get(3)); + } + + @Test + public void testGetWithName() throws Exception { + TypeInfo ti = getTypeInfo(); + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti)); + HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti) + .get(0).getStructSubSchema(); + Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema)); + } + + @Test + public void testGetAll() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + List list = r.getAll(); + Assert.assertEquals(INT_CONST, ((Integer) list.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) list.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) list.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) list.get(3)); + } + + @Test + public void testSet() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.set(3, "Mary had a little lamb"); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testGetWithName() throws Exception { - TypeInfo ti = getTypeInfo(); - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti)); - HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti) - .get(0).getStructSubSchema(); - Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema)); - } - - @Test - public void testGetAll() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - List list = r.getAll(); - Assert.assertEquals(INT_CONST, ((Integer) list.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) list.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) list.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) list.get(3)); - } - - @Test - public void testSet() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.set(3, "Mary had a little lamb"); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testSize() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - Assert.assertEquals(4, r.size()); - } - - @Test - public void testReadFields() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.readFields(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testWrite() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.write(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testSetWithName() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.set("fred", null, "bob"); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); + Assert.assertTrue(sawException); + } + + @Test + public void testSize() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + Assert.assertEquals(4, r.size()); + } + + @Test + public void testReadFields() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.readFields(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testRemove() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.remove(0); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); + Assert.assertTrue(sawException); + } + + @Test + public void testWrite() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.write(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testCopy() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.copy(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); + Assert.assertTrue(sawException); + } + + @Test + public void testSetWithName() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.set("fred", null, "bob"); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - @Test - public void testGetWritable() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()).getWritable(); - Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get(3)); - Assert.assertEquals("org.apache.hive.hcatalog.data.DefaultHCatRecord", r.getClass().getName()); + Assert.assertTrue(sawException); + } + + @Test + public void testRemove() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.remove(0); + } catch (UnsupportedOperationException uoe) { + sawException = true; } - - private HCatRecord getHCatRecord() throws Exception { - List rec_1 = new ArrayList(4); - rec_1.add( new Integer(INT_CONST)); - rec_1.add( new Long(LONG_CONST)); - rec_1.add( new Double(DOUBLE_CONST)); - rec_1.add( new String(STRING_CONST)); - - return new DefaultHCatRecord(rec_1); - } - - private TypeInfo getTypeInfo() throws Exception { - List names = new ArrayList(4); - names.add("an_int"); - names.add("a_long"); - names.add("a_double"); - names.add("a_string"); - - List tis = new ArrayList(4); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("int")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("double")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("string")); - - return TypeInfoFactory.getStructTypeInfo(names, tis); - } - - private ObjectInspector getObjectInspector(TypeInfo ti) throws Exception { - return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector((StructTypeInfo)ti); - } - - private ObjectInspector getObjectInspector() throws Exception { - return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector( - (StructTypeInfo)getTypeInfo()); + Assert.assertTrue(sawException); + } + + @Test + public void testCopy() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.copy(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; } + Assert.assertTrue(sawException); + } + + @Test + public void testGetWritable() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()).getWritable(); + Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get(3)); + Assert.assertEquals("org.apache.hive.hcatalog.data.DefaultHCatRecord", r.getClass().getName()); + } + + private HCatRecord getHCatRecord() throws Exception { + List rec_1 = new ArrayList(4); + rec_1.add( new Integer(INT_CONST)); + rec_1.add( new Long(LONG_CONST)); + rec_1.add( new Double(DOUBLE_CONST)); + rec_1.add( new String(STRING_CONST)); + + return new DefaultHCatRecord(rec_1); + } + + private TypeInfo getTypeInfo() throws Exception { + List names = new ArrayList(4); + names.add("an_int"); + names.add("a_long"); + names.add("a_double"); + names.add("a_string"); + + List tis = new ArrayList(4); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("int")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("double")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("string")); + + return TypeInfoFactory.getStructTypeInfo(names, tis); + } + + private ObjectInspector getObjectInspector(TypeInfo ti) throws Exception { + return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector((StructTypeInfo)ti); + } + + private ObjectInspector getObjectInspector() throws Exception { + return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector( + (StructTypeInfo)getTypeInfo()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestReaderWriter.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestReaderWriter.java index c1017fc..f1e893e 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestReaderWriter.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestReaderWriter.java @@ -50,134 +50,134 @@ public class TestReaderWriter extends HCatBaseTest { - @Test - public void test() throws MetaException, CommandNeedRetryException, - IOException, ClassNotFoundException { - - driver.run("drop table mytbl"); - driver.run("create table mytbl (a string, b int)"); - Iterator> itr = hiveConf.iterator(); - Map map = new HashMap(); - while (itr.hasNext()) { - Entry kv = itr.next(); - map.put(kv.getKey(), kv.getValue()); - } - - WriterContext cntxt = runsInMaster(map); - - File writeCntxtFile = File.createTempFile("hcat-write", "temp"); - writeCntxtFile.deleteOnExit(); - - // Serialize context. - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(writeCntxtFile)); - oos.writeObject(cntxt); - oos.flush(); - oos.close(); - - // Now, deserialize it. - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(writeCntxtFile)); - cntxt = (WriterContext) ois.readObject(); - ois.close(); - - runsInSlave(cntxt); - commit(map, true, cntxt); - - ReaderContext readCntxt = runsInMaster(map, false); - - File readCntxtFile = File.createTempFile("hcat-read", "temp"); - readCntxtFile.deleteOnExit(); - oos = new ObjectOutputStream(new FileOutputStream(readCntxtFile)); - oos.writeObject(readCntxt); - oos.flush(); - oos.close(); - - ois = new ObjectInputStream(new FileInputStream(readCntxtFile)); - readCntxt = (ReaderContext) ois.readObject(); - ois.close(); - - for (InputSplit split : readCntxt.getSplits()) { - runsInSlave(split, readCntxt.getConf()); - } + @Test + public void test() throws MetaException, CommandNeedRetryException, + IOException, ClassNotFoundException { + + driver.run("drop table mytbl"); + driver.run("create table mytbl (a string, b int)"); + Iterator> itr = hiveConf.iterator(); + Map map = new HashMap(); + while (itr.hasNext()) { + Entry kv = itr.next(); + map.put(kv.getKey(), kv.getValue()); } - private WriterContext runsInMaster(Map config) throws HCatException { + WriterContext cntxt = runsInMaster(map); - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable("mytbl").build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - WriterContext info = writer.prepareWrite(); - return info; - } + File writeCntxtFile = File.createTempFile("hcat-write", "temp"); + writeCntxtFile.deleteOnExit(); - private ReaderContext runsInMaster(Map config, boolean bogus) - throws HCatException { - ReadEntity entity = new ReadEntity.Builder().withTable("mytbl").build(); - HCatReader reader = DataTransferFactory.getHCatReader(entity, config); - ReaderContext cntxt = reader.prepareRead(); - return cntxt; - } + // Serialize context. + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(writeCntxtFile)); + oos.writeObject(cntxt); + oos.flush(); + oos.close(); - private void runsInSlave(InputSplit split, Configuration config) throws HCatException { - - HCatReader reader = DataTransferFactory.getHCatReader(split, config); - Iterator itr = reader.read(); - int i = 1; - while (itr.hasNext()) { - HCatRecord read = itr.next(); - HCatRecord written = getRecord(i++); - // Argh, HCatRecord doesnt implement equals() - Assert.assertTrue("Read: " + read.get(0) + "Written: " + written.get(0), - written.get(0).equals(read.get(0))); - Assert.assertTrue("Read: " + read.get(1) + "Written: " + written.get(1), - written.get(1).equals(read.get(1))); - Assert.assertEquals(2, read.size()); - } - //Assert.assertFalse(itr.hasNext()); - } + // Now, deserialize it. + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(writeCntxtFile)); + cntxt = (WriterContext) ois.readObject(); + ois.close(); - private void runsInSlave(WriterContext context) throws HCatException { + runsInSlave(cntxt); + commit(map, true, cntxt); - HCatWriter writer = DataTransferFactory.getHCatWriter(context); - writer.write(new HCatRecordItr()); - } + ReaderContext readCntxt = runsInMaster(map, false); - private void commit(Map config, boolean status, - WriterContext context) throws IOException { - - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable("mytbl").build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - if (status) { - writer.commit(context); - } else { - writer.abort(context); - } + File readCntxtFile = File.createTempFile("hcat-read", "temp"); + readCntxtFile.deleteOnExit(); + oos = new ObjectOutputStream(new FileOutputStream(readCntxtFile)); + oos.writeObject(readCntxt); + oos.flush(); + oos.close(); + + ois = new ObjectInputStream(new FileInputStream(readCntxtFile)); + readCntxt = (ReaderContext) ois.readObject(); + ois.close(); + + for (InputSplit split : readCntxt.getSplits()) { + runsInSlave(split, readCntxt.getConf()); + } + } + + private WriterContext runsInMaster(Map config) throws HCatException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable("mytbl").build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + WriterContext info = writer.prepareWrite(); + return info; + } + + private ReaderContext runsInMaster(Map config, boolean bogus) + throws HCatException { + ReadEntity entity = new ReadEntity.Builder().withTable("mytbl").build(); + HCatReader reader = DataTransferFactory.getHCatReader(entity, config); + ReaderContext cntxt = reader.prepareRead(); + return cntxt; + } + + private void runsInSlave(InputSplit split, Configuration config) throws HCatException { + + HCatReader reader = DataTransferFactory.getHCatReader(split, config); + Iterator itr = reader.read(); + int i = 1; + while (itr.hasNext()) { + HCatRecord read = itr.next(); + HCatRecord written = getRecord(i++); + // Argh, HCatRecord doesnt implement equals() + Assert.assertTrue("Read: " + read.get(0) + "Written: " + written.get(0), + written.get(0).equals(read.get(0))); + Assert.assertTrue("Read: " + read.get(1) + "Written: " + written.get(1), + written.get(1).equals(read.get(1))); + Assert.assertEquals(2, read.size()); } + //Assert.assertFalse(itr.hasNext()); + } - private static HCatRecord getRecord(int i) { - List list = new ArrayList(2); - list.add("Row #: " + i); - list.add(i); - return new DefaultHCatRecord(list); + private void runsInSlave(WriterContext context) throws HCatException { + + HCatWriter writer = DataTransferFactory.getHCatWriter(context); + writer.write(new HCatRecordItr()); + } + + private void commit(Map config, boolean status, + WriterContext context) throws IOException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable("mytbl").build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + if (status) { + writer.commit(context); + } else { + writer.abort(context); } + } - private static class HCatRecordItr implements Iterator { + private static HCatRecord getRecord(int i) { + List list = new ArrayList(2); + list.add("Row #: " + i); + list.add(i); + return new DefaultHCatRecord(list); + } - int i = 0; + private static class HCatRecordItr implements Iterator { - @Override - public boolean hasNext() { - return i++ < 100 ? true : false; - } + int i = 0; - @Override - public HCatRecord next() { - return getRecord(i); - } + @Override + public boolean hasNext() { + return i++ < 100 ? true : false; + } + + @Override + public HCatRecord next() { + return getRecord(i); + } - @Override - public void remove() { - throw new RuntimeException(); - } + @Override + public void remove() { + throw new RuntimeException(); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchema.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchema.java index 5d888cd..3b5053c 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchema.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchema.java @@ -25,79 +25,79 @@ import java.util.List; public class TestHCatSchema extends TestCase { - public void testCannotAddFieldMoreThanOnce() throws HCatException { - List fieldSchemaList = new ArrayList(); - fieldSchemaList.add(new HCatFieldSchema("name", HCatFieldSchema.Type.STRING, "What's your handle?")); - fieldSchemaList.add(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); - - HCatSchema schema = new HCatSchema(fieldSchemaList); - - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); - - try { - schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); - fail("Was able to append field schema with same name"); - } catch (HCatException he) { - assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); - } - - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); - - // Should also not be able to add fields of different types with same name - try { - schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.STRING, "Maybe spelled out?")); - fail("Was able to append field schema with same name"); - } catch (HCatException he) { - assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); - } - - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); + public void testCannotAddFieldMoreThanOnce() throws HCatException { + List fieldSchemaList = new ArrayList(); + fieldSchemaList.add(new HCatFieldSchema("name", HCatFieldSchema.Type.STRING, "What's your handle?")); + fieldSchemaList.add(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); + + HCatSchema schema = new HCatSchema(fieldSchemaList); + + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); + + try { + schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); + fail("Was able to append field schema with same name"); + } catch (HCatException he) { + assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); } - public void testHashCodeEquals() throws HCatException { - HCatFieldSchema memberID1 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); - HCatFieldSchema memberID2 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); - assertTrue("Expected objects to be equal", memberID1.equals(memberID2)); - assertTrue("Expected hash codes to be equal", memberID1.hashCode() == memberID2.hashCode()); + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); + + // Should also not be able to add fields of different types with same name + try { + schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.STRING, "Maybe spelled out?")); + fail("Was able to append field schema with same name"); + } catch (HCatException he) { + assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); } - public void testCannotInstantiateSchemaWithRepeatedFieldNames() throws HCatException { - List fieldSchemaList = new ArrayList(); + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); + } - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); - fieldSchemaList.add(new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo")); + public void testHashCodeEquals() throws HCatException { + HCatFieldSchema memberID1 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); + HCatFieldSchema memberID2 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); + assertTrue("Expected objects to be equal", memberID1.equals(memberID2)); + assertTrue("Expected hash codes to be equal", memberID1.hashCode() == memberID2.hashCode()); + } - // No duplicate names. This should be ok - HCatSchema schema = new HCatSchema(fieldSchemaList); + public void testCannotInstantiateSchemaWithRepeatedFieldNames() throws HCatException { + List fieldSchemaList = new ArrayList(); - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.STRING, "as a String")); + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); + fieldSchemaList.add(new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo")); - // Now a duplicated field name. Should fail - try { - HCatSchema schema2 = new HCatSchema(fieldSchemaList); - fail("Able to add duplicate field name"); - } catch (IllegalArgumentException iae) { - assertTrue(iae.getMessage().contains("Field named memberID already exists")); - } + // No duplicate names. This should be ok + HCatSchema schema = new HCatSchema(fieldSchemaList); + + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.STRING, "as a String")); + + // Now a duplicated field name. Should fail + try { + HCatSchema schema2 = new HCatSchema(fieldSchemaList); + fail("Able to add duplicate field name"); + } catch (IllegalArgumentException iae) { + assertTrue(iae.getMessage().contains("Field named memberID already exists")); + } + } + public void testRemoveAddField() throws HCatException { + List fieldSchemaList = new ArrayList(); + + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); + HCatFieldSchema locationField = new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo"); + fieldSchemaList.add(locationField); + HCatSchema schema = new HCatSchema(fieldSchemaList); + schema.remove(locationField); + Integer position = schema.getPosition(locationField.getName()); + assertTrue("position is not null after remove" , position == null); + try { + schema.append(locationField); } - public void testRemoveAddField() throws HCatException { - List fieldSchemaList = new ArrayList(); - - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); - HCatFieldSchema locationField = new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo"); - fieldSchemaList.add(locationField); - HCatSchema schema = new HCatSchema(fieldSchemaList); - schema.remove(locationField); - Integer position = schema.getPosition(locationField.getName()); - assertTrue("position is not null after remove" , position == null); - try { - schema.append(locationField); - } - catch (HCatException ex) { - assertFalse(ex.getMessage(), true); - } + catch (HCatException ex) { + assertFalse(ex.getMessage(), true); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchemaUtils.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchemaUtils.java index bc19f89..56c1844 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchemaUtils.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchemaUtils.java @@ -31,52 +31,52 @@ public class TestHCatSchemaUtils extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatSchemaUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatSchemaUtils.class); - public void testSimpleOperation() throws Exception { - String typeString = "struct," - + "currently_registered_courses:array," - + "current_grades:map," - + "phnos:array>,blah:array>"; + public void testSimpleOperation() throws Exception { + String typeString = "struct," + + "currently_registered_courses:array," + + "current_grades:map," + + "phnos:array>,blah:array>"; - TypeInfo ti = TypeInfoUtils.getTypeInfoFromTypeString(typeString); + TypeInfo ti = TypeInfoUtils.getTypeInfoFromTypeString(typeString); - HCatSchema hsch = HCatSchemaUtils.getHCatSchemaFromTypeString(typeString); - LOG.info("Type name : {}", ti.getTypeName()); - LOG.info("HCatSchema : {}", hsch); - assertEquals(hsch.size(), 1); - assertEquals(ti.getTypeName(), hsch.get(0).getTypeString()); - assertEquals(hsch.get(0).getTypeString(), typeString); - } + HCatSchema hsch = HCatSchemaUtils.getHCatSchemaFromTypeString(typeString); + LOG.info("Type name : {}", ti.getTypeName()); + LOG.info("HCatSchema : {}", hsch); + assertEquals(hsch.size(), 1); + assertEquals(ti.getTypeName(), hsch.get(0).getTypeString()); + assertEquals(hsch.get(0).getTypeString(), typeString); + } - @SuppressWarnings("unused") - private void pretty_print(PrintStream pout, HCatSchema hsch) throws HCatException { - pretty_print(pout, hsch, ""); - } + @SuppressWarnings("unused") + private void pretty_print(PrintStream pout, HCatSchema hsch) throws HCatException { + pretty_print(pout, hsch, ""); + } - private void pretty_print(PrintStream pout, HCatSchema hsch, String prefix) throws HCatException { - int i = 0; - for (HCatFieldSchema field : hsch.getFields()) { - pretty_print(pout, field, prefix + "." + (field.getName() == null ? i : field.getName())); - i++; - } + private void pretty_print(PrintStream pout, HCatSchema hsch, String prefix) throws HCatException { + int i = 0; + for (HCatFieldSchema field : hsch.getFields()) { + pretty_print(pout, field, prefix + "." + (field.getName() == null ? i : field.getName())); + i++; } + } - private void pretty_print(PrintStream pout, HCatFieldSchema hfsch, String prefix) throws HCatException { + private void pretty_print(PrintStream pout, HCatFieldSchema hfsch, String prefix) throws HCatException { - Category tcat = hfsch.getCategory(); - if (Category.STRUCT == tcat) { - pretty_print(pout, hfsch.getStructSubSchema(), prefix); - } else if (Category.ARRAY == tcat) { - pretty_print(pout, hfsch.getArrayElementSchema(), prefix); - } else if (Category.MAP == tcat) { - pout.println(prefix + ".mapkey:\t" + hfsch.getMapKeyType().toString()); - pretty_print(pout, hfsch.getMapValueSchema(), prefix + ".mapvalue:"); - } else { - pout.println(prefix + "\t" + hfsch.getType().toString()); - } + Category tcat = hfsch.getCategory(); + if (Category.STRUCT == tcat) { + pretty_print(pout, hfsch.getStructSubSchema(), prefix); + } else if (Category.ARRAY == tcat) { + pretty_print(pout, hfsch.getArrayElementSchema(), prefix); + } else if (Category.MAP == tcat) { + pout.println(prefix + ".mapkey:\t" + hfsch.getMapKeyType().toString()); + pretty_print(pout, hfsch.getMapValueSchema(), prefix + ".mapvalue:"); + } else { + pout.println(prefix + "\t" + hfsch.getType().toString()); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java index 551b471..f68dbb8 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java @@ -27,26 +27,26 @@ public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned { - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testOrcDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); - } - - @Override - protected String inputFormat() { - return OrcInputFormat.class.getName(); - } - - @Override - protected String outputFormat() { - return OrcOutputFormat.class.getName(); - } - - @Override - protected String serdeClass() { - return OrcSerde.class.getName(); - } + @BeforeClass + public static void generateInputData() throws Exception { + tableName = "testOrcDynamicPartitionedTable"; + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + generateDataColumns(); + } + + @Override + protected String inputFormat() { + return OrcInputFormat.class.getName(); + } + + @Override + protected String outputFormat() { + return OrcOutputFormat.class.getName(); + } + + @Override + protected String serdeClass() { + return OrcSerde.class.getName(); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java index 8caa916..461bd99 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java @@ -39,48 +39,48 @@ * Simplify writing HCatalog tests that require a HiveMetaStore. */ public class HCatBaseTest { - protected static final Logger LOG = LoggerFactory.getLogger(HCatBaseTest.class); - protected static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + HCatBaseTest.class.getCanonicalName(); - protected static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + protected static final Logger LOG = LoggerFactory.getLogger(HCatBaseTest.class); + protected static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + HCatBaseTest.class.getCanonicalName(); + protected static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - protected HiveConf hiveConf = null; - protected Driver driver = null; - protected HiveMetaStoreClient client = null; + protected HiveConf hiveConf = null; + protected Driver driver = null; + protected HiveMetaStoreClient client = null; - @BeforeClass - public static void setUpTestDataDir() throws Exception { - LOG.info("Using warehouse directory " + TEST_WAREHOUSE_DIR); - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - Assert.assertTrue(new File(TEST_WAREHOUSE_DIR).mkdirs()); + @BeforeClass + public static void setUpTestDataDir() throws Exception { + LOG.info("Using warehouse directory " + TEST_WAREHOUSE_DIR); + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); } + Assert.assertTrue(new File(TEST_WAREHOUSE_DIR).mkdirs()); + } - @Before - public void setUp() throws Exception { - if (driver == null) { - setUpHiveConf(); - driver = new Driver(hiveConf); - client = new HiveMetaStoreClient(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - } + @Before + public void setUp() throws Exception { + if (driver == null) { + setUpHiveConf(); + driver = new Driver(hiveConf); + client = new HiveMetaStoreClient(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); } + } - /** - * Create a new HiveConf and set properties necessary for unit tests. - */ - protected void setUpHiveConf() { - hiveConf = new HiveConf(this.getClass()); - hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, ""); - hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); - hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); - hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); - } + /** + * Create a new HiveConf and set properties necessary for unit tests. + */ + protected void setUpHiveConf() { + hiveConf = new HiveConf(this.getClass()); + hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, ""); + hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); + hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); + } - protected void logAndRegister(PigServer server, String query) throws IOException { - LOG.info("Registering pig query: " + query); - server.registerQuery(query); - } + protected void logAndRegister(PigServer server, String query) throws IOException { + LOG.info("Registering pig query: " + query); + server.registerQuery(query); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java index e71b672..dd63559 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java @@ -72,310 +72,310 @@ */ public abstract class HCatMapReduceTest extends HCatBaseTest { - private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class); - protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - protected static String tableName = "testHCatMapReduceTable"; + private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class); + protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + protected static String tableName = "testHCatMapReduceTable"; - private static List writeRecords = new ArrayList(); - private static List readRecords = new ArrayList(); + private static List writeRecords = new ArrayList(); + private static List readRecords = new ArrayList(); - protected abstract List getPartitionKeys(); + protected abstract List getPartitionKeys(); - protected abstract List getTableColumns(); + protected abstract List getTableColumns(); - private static FileSystem fs; + private static FileSystem fs; - protected Boolean isTableExternal() { - return false; - } + protected Boolean isTableExternal() { + return false; + } - protected String inputFormat() { - return RCFileInputFormat.class.getName(); - } - - protected String outputFormat() { - return RCFileOutputFormat.class.getName(); - } - - protected String serdeClass() { - return ColumnarSerDe.class.getName(); - } - - @BeforeClass - public static void setUpOneTime() throws Exception { - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - - HiveConf hiveConf = new HiveConf(); - hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); - // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time - // Otherwise the cache doesn't play well with the second test method with the client gets closed() in the - // tearDown() of the previous test - HCatUtil.getHiveClient(hiveConf); - - MapCreate.writeCount = 0; - MapRead.readCount = 0; - } + protected String inputFormat() { + return RCFileInputFormat.class.getName(); + } - @After - public void deleteTable() throws Exception { - try { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + protected String outputFormat() { + return RCFileOutputFormat.class.getName(); + } - client.dropTable(databaseName, tableName); - } catch (Exception e) { - e.printStackTrace(); - throw e; - } - } + protected String serdeClass() { + return ColumnarSerDe.class.getName(); + } - @Before - public void createTable() throws Exception { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + @BeforeClass + public static void setUpOneTime() throws Exception { + fs = new LocalFileSystem(); + fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - try { - client.dropTable(databaseName, tableName); - } catch (Exception e) { - } //can fail with NoSuchObjectException + HiveConf hiveConf = new HiveConf(); + hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); + // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time + // Otherwise the cache doesn't play well with the second test method with the client gets closed() in the + // tearDown() of the previous test + HCatUtil.getHiveClient(hiveConf); + MapCreate.writeCount = 0; + MapRead.readCount = 0; + } - Table tbl = new Table(); - tbl.setDbName(databaseName); - tbl.setTableName(tableName); - if (isTableExternal()){ - tbl.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - tbl.setTableType(TableType.MANAGED_TABLE.toString()); - } - StorageDescriptor sd = new StorageDescriptor(); + @After + public void deleteTable() throws Exception { + try { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; - sd.setCols(getTableColumns()); - tbl.setPartitionKeys(getPartitionKeys()); + client.dropTable(databaseName, tableName); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } - tbl.setSd(sd); + @Before + public void createTable() throws Exception { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; - sd.setBucketCols(new ArrayList(2)); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - if (isTableExternal()){ - sd.getSerdeInfo().getParameters().put("EXTERNAL", "TRUE"); - } - sd.getSerdeInfo().setSerializationLib(serdeClass()); - sd.setInputFormat(inputFormat()); - sd.setOutputFormat(outputFormat()); + try { + client.dropTable(databaseName, tableName); + } catch (Exception e) { + } //can fail with NoSuchObjectException - Map tableParams = new HashMap(); - tbl.setParameters(tableParams); - client.createTable(tbl); + Table tbl = new Table(); + tbl.setDbName(databaseName); + tbl.setTableName(tableName); + if (isTableExternal()){ + tbl.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + tbl.setTableType(TableType.MANAGED_TABLE.toString()); } + StorageDescriptor sd = new StorageDescriptor(); - //Create test input file with specified number of rows - private void createInputFile(Path path, int rowCount) throws IOException { + sd.setCols(getTableColumns()); + tbl.setPartitionKeys(getPartitionKeys()); - if (fs.exists(path)) { - fs.delete(path, true); - } + tbl.setSd(sd); - FSDataOutputStream os = fs.create(path); + sd.setBucketCols(new ArrayList(2)); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + if (isTableExternal()){ + sd.getSerdeInfo().getParameters().put("EXTERNAL", "TRUE"); + } + sd.getSerdeInfo().setSerializationLib(serdeClass()); + sd.setInputFormat(inputFormat()); + sd.setOutputFormat(outputFormat()); - for (int i = 0; i < rowCount; i++) { - os.writeChars(i + "\n"); - } + Map tableParams = new HashMap(); + tbl.setParameters(tableParams); - os.close(); + client.createTable(tbl); + } + + //Create test input file with specified number of rows + private void createInputFile(Path path, int rowCount) throws IOException { + + if (fs.exists(path)) { + fs.delete(path, true); } - public static class MapCreate extends - Mapper { + FSDataOutputStream os = fs.create(path); - static int writeCount = 0; //test will be in local mode + for (int i = 0; i < rowCount; i++) { + os.writeChars(i + "\n"); + } - @Override - public void map(LongWritable key, Text value, Context context - ) throws IOException, InterruptedException { - { - try { - HCatRecord rec = writeRecords.get(writeCount); - context.write(null, rec); - writeCount++; + os.close(); + } - } catch (Exception e) { + public static class MapCreate extends + Mapper { - e.printStackTrace(System.err); //print since otherwise exception is lost - throw new IOException(e); - } - } - } - } + static int writeCount = 0; //test will be in local mode - public static class MapRead extends - Mapper { - - static int readCount = 0; //test will be in local mode - - @Override - public void map(WritableComparable key, HCatRecord value, Context context - ) throws IOException, InterruptedException { - { - try { - readRecords.add(value); - readCount++; - } catch (Exception e) { - e.printStackTrace(); //print since otherwise exception is lost - throw new IOException(e); - } - } - } - } + @Override + public void map(LongWritable key, Text value, Context context + ) throws IOException, InterruptedException { + { + try { + HCatRecord rec = writeRecords.get(writeCount); + context.write(null, rec); + writeCount++; - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite) throws Exception { - return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true); - } + } catch (Exception e) { - /** - * Run a local map reduce job to load data from in memory records to an HCatalog Table - * @param partitionValues - * @param partitionColumns - * @param records data to be written to HCatalog table - * @param writeCount - * @param assertWrite - * @param asSingleMapTask - * @return - * @throws Exception - */ - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite, boolean asSingleMapTask) throws Exception { - - writeRecords = records; - MapCreate.writeCount = 0; - - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce write test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(HCatMapReduceTest.MapCreate.class); - - // input/output settings - job.setInputFormatClass(TextInputFormat.class); - - if (asSingleMapTask) { - // One input path would mean only one map task - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); - createInputFile(path, writeCount); - TextInputFormat.setInputPaths(job, path); - } else { - // Create two input paths so that two map tasks get triggered. There could be other ways - // to trigger two map tasks. - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); - createInputFile(path, writeCount / 2); - - Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); - createInputFile(path2, (writeCount - writeCount / 2)); - - TextInputFormat.setInputPaths(job, path, path2); + e.printStackTrace(System.err); //print since otherwise exception is lost + throw new IOException(e); } + } + } + } - job.setOutputFormatClass(HCatOutputFormat.class); + public static class MapRead extends + Mapper { - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); - HCatOutputFormat.setOutput(job, outputJobInfo); + static int readCount = 0; //test will be in local mode + + @Override + public void map(WritableComparable key, HCatRecord value, Context context + ) throws IOException, InterruptedException { + { + try { + readRecords.add(value); + readCount++; + } catch (Exception e) { + e.printStackTrace(); //print since otherwise exception is lost + throw new IOException(e); + } + } + } + } + + Job runMRCreate(Map partitionValues, + List partitionColumns, List records, + int writeCount, boolean assertWrite) throws Exception { + return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true); + } + + /** + * Run a local map reduce job to load data from in memory records to an HCatalog Table + * @param partitionValues + * @param partitionColumns + * @param records data to be written to HCatalog table + * @param writeCount + * @param assertWrite + * @param asSingleMapTask + * @return + * @throws Exception + */ + Job runMRCreate(Map partitionValues, + List partitionColumns, List records, + int writeCount, boolean assertWrite, boolean asSingleMapTask) throws Exception { + + writeRecords = records; + MapCreate.writeCount = 0; + + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce write test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(HCatMapReduceTest.MapCreate.class); + + // input/output settings + job.setInputFormatClass(TextInputFormat.class); + + if (asSingleMapTask) { + // One input path would mean only one map task + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); + createInputFile(path, writeCount); + TextInputFormat.setInputPaths(job, path); + } else { + // Create two input paths so that two map tasks get triggered. There could be other ways + // to trigger two map tasks. + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); + createInputFile(path, writeCount / 2); + + Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); + createInputFile(path2, (writeCount - writeCount / 2)); + + TextInputFormat.setInputPaths(job, path, path2); + } - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(DefaultHCatRecord.class); + job.setOutputFormatClass(HCatOutputFormat.class); - job.setNumReduceTasks(0); + OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); + HCatOutputFormat.setOutput(job, outputJobInfo); - HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(DefaultHCatRecord.class); - boolean success = job.waitForCompletion(true); + job.setNumReduceTasks(0); - // Ensure counters are set when data has actually been read. - if (partitionValues != null) { - assertTrue(job.getCounters().getGroup("FileSystemCounters") - .findCounter("FILE_BYTES_READ").getValue() > 0); - } + HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); - if (!HCatUtil.isHadoop23()) { - // Local mode outputcommitter hook is not invoked in Hadoop 1.x - if (success) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } else { - new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); - } - } - if (assertWrite) { - // we assert only if we expected to assert with this call. - Assert.assertEquals(writeCount, MapCreate.writeCount); - } + boolean success = job.waitForCompletion(true); - return job; + // Ensure counters are set when data has actually been read. + if (partitionValues != null) { + assertTrue(job.getCounters().getGroup("FileSystemCounters") + .findCounter("FILE_BYTES_READ").getValue() > 0); } - List runMRRead(int readCount) throws Exception { - return runMRRead(readCount, null); + if (!HCatUtil.isHadoop23()) { + // Local mode outputcommitter hook is not invoked in Hadoop 1.x + if (success) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } else { + new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); + } + } + if (assertWrite) { + // we assert only if we expected to assert with this call. + Assert.assertEquals(writeCount, MapCreate.writeCount); } - /** - * Run a local map reduce job to read records from HCatalog table and verify if the count is as expected - * @param readCount - * @param filter - * @return - * @throws Exception - */ - List runMRRead(int readCount, String filter) throws Exception { + return job; + } - MapRead.readCount = 0; - readRecords.clear(); + List runMRRead(int readCount) throws Exception { + return runMRRead(readCount, null); + } - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce read test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(HCatMapReduceTest.MapRead.class); + /** + * Run a local map reduce job to read records from HCatalog table and verify if the count is as expected + * @param readCount + * @param filter + * @return + * @throws Exception + */ + List runMRRead(int readCount, String filter) throws Exception { - // input/output settings - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); + MapRead.readCount = 0; + readRecords.clear(); - HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter); + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce read test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(HCatMapReduceTest.MapRead.class); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); + // input/output settings + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); - job.setNumReduceTasks(0); + HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter); - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); - if (fs.exists(path)) { - fs.delete(path, true); - } + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); - TextOutputFormat.setOutputPath(job, path); + job.setNumReduceTasks(0); - job.waitForCompletion(true); - Assert.assertEquals(readCount, MapRead.readCount); - - return readRecords; + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); + if (fs.exists(path)) { + fs.delete(path, true); } + TextOutputFormat.setOutputPath(job, path); - protected HCatSchema getTableSchema() throws Exception { + job.waitForCompletion(true); + Assert.assertEquals(readCount, MapRead.readCount); - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce read schema test"); - job.setJarByClass(this.getClass()); + return readRecords; + } - // input/output settings - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - HCatInputFormat.setInput(job, dbName, tableName); + protected HCatSchema getTableSchema() throws Exception { - return HCatInputFormat.getTableSchema(job); - } + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce read schema test"); + job.setJarByClass(this.getClass()); + + // input/output settings + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + HCatInputFormat.setInput(job, dbName, tableName); + + return HCatInputFormat.getTableSchema(job); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java index ab6ff14..d8b69c2 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java @@ -47,162 +47,162 @@ public class TestHCatDynamicPartitioned extends HCatMapReduceTest { - private static List writeRecords; - private static List dataColumns; - private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class); - protected static final int NUM_RECORDS = 20; - protected static final int NUM_PARTITIONS = 5; - - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testHCatDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); + private static List writeRecords; + private static List dataColumns; + private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class); + protected static final int NUM_RECORDS = 20; + protected static final int NUM_PARTITIONS = 5; + + @BeforeClass + public static void generateInputData() throws Exception { + tableName = "testHCatDynamicPartitionedTable"; + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + generateDataColumns(); + } + + protected static void generateDataColumns() throws HCatException { + dataColumns = new ArrayList(); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, ""))); + } + + protected static void generateWriteRecords(int max, int mod, int offset) { + writeRecords = new ArrayList(); + + for (int i = 0; i < max; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("strvalue" + i); + objList.add(String.valueOf((i % mod) + offset)); + writeRecords.add(new DefaultHCatRecord(objList)); } - - protected static void generateDataColumns() throws HCatException { - dataColumns = new ArrayList(); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, ""))); + } + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + fields.add(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + /** + * Run the dynamic partitioning test but with single map task + * @throws Exception + */ + @Test + public void testHCatDynamicPartitionedTable() throws Exception { + runHCatDynamicPartitionedTable(true); + } + + /** + * Run the dynamic partitioning test but with multiple map task. See HCATALOG-490 + * @throws Exception + */ + @Test + public void testHCatDynamicPartitionedTableMultipleTask() throws Exception { + runHCatDynamicPartitionedTable(false); + } + + protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask) throws Exception { + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask); + + runMRRead(NUM_RECORDS); + + //Read with partition filter + runMRRead(4, "p1 = \"0\""); + runMRRead(8, "p1 = \"1\" or p1 = \"3\""); + runMRRead(4, "p1 = \"4\""); + + // read from hive to test + + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } - protected static void generateWriteRecords(int max, int mod, int offset) { - writeRecords = new ArrayList(); + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_RECORDS, res.size()); - for (int i = 0; i < max; i++) { - List objList = new ArrayList(); - objList.add(i); - objList.add("strvalue" + i); - objList.add(String.valueOf((i % mod) + offset)); - writeRecords.add(new DefaultHCatRecord(objList)); - } - } + //Test for duplicate publish + IOException exc = null; + try { + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false); - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - fields.add(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + if (HCatUtil.isHadoop23()) { + Assert.assertTrue(job.isSuccessful()==false); + } + } catch (IOException e) { + exc = e; } - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + if (!HCatUtil.isHadoop23()) { + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() + + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED", + (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) + || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) + ); } - /** - * Run the dynamic partitioning test but with single map task - * @throws Exception - */ - @Test - public void testHCatDynamicPartitionedTable() throws Exception { - runHCatDynamicPartitionedTable(true); + query = "show partitions " + tableName; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } - - /** - * Run the dynamic partitioning test but with multiple map task. See HCATALOG-490 - * @throws Exception - */ - @Test - public void testHCatDynamicPartitionedTableMultipleTask() throws Exception { - runHCatDynamicPartitionedTable(false); + res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_PARTITIONS, res.size()); + + query = "select * from " + tableName; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } + res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_RECORDS, res.size()); + } - protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask) throws Exception { - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask); - - runMRRead(NUM_RECORDS); - - //Read with partition filter - runMRRead(4, "p1 = \"0\""); - runMRRead(8, "p1 = \"1\" or p1 = \"3\""); - runMRRead(4, "p1 = \"4\""); - - // read from hive to test - - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_RECORDS, res.size()); - - - //Test for duplicate publish - IOException exc = null; - try { - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false); - - if (HCatUtil.isHadoop23()) { - Assert.assertTrue(job.isSuccessful()==false); - } - } catch (IOException e) { - exc = e; - } - - if (!HCatUtil.isHadoop23()) { - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() - + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED", - (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) - || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) - ); - } - - query = "show partitions " + tableName; - retCode = driver.run(query).getResponseCode(); - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_PARTITIONS, res.size()); - - query = "select * from " + tableName; - retCode = driver.run(query).getResponseCode(); - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_RECORDS, res.size()); + //TODO 1.0 miniCluster is slow this test times out, make it work +// renaming test to make test framework skip it + public void _testHCatDynamicPartitionMaxPartitions() throws Exception { + HiveConf hc = new HiveConf(this.getClass()); + + int maxParts = hiveConf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); + LOG.info("Max partitions allowed = {}", maxParts); + + IOException exc = null; + try { + generateWriteRecords(maxParts + 5, maxParts + 2, 10); + runMRCreate(null, dataColumns, writeRecords, maxParts + 5, false); + } catch (IOException e) { + exc = e; } - //TODO 1.0 miniCluster is slow this test times out, make it work -// renaming test to make test framework skip it - public void _testHCatDynamicPartitionMaxPartitions() throws Exception { - HiveConf hc = new HiveConf(this.getClass()); - - int maxParts = hiveConf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); - LOG.info("Max partitions allowed = {}", maxParts); - - IOException exc = null; - try { - generateWriteRecords(maxParts + 5, maxParts + 2, 10); - runMRCreate(null, dataColumns, writeRecords, maxParts + 5, false); - } catch (IOException e) { - exc = e; - } - - if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, ((HCatException) exc).getErrorType()); - } else { - assertTrue(exc == null); - runMRRead(maxParts + 5); - } + if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, ((HCatException) exc).getErrorType()); + } else { + assertTrue(exc == null); + runMRRead(maxParts + 5); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java index 673f2aa..36c7945 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java @@ -21,9 +21,9 @@ public class TestHCatExternalDynamicPartitioned extends TestHCatDynamicPartitioned { - @Override - protected Boolean isTableExternal() { - return true; - } + @Override + protected Boolean isTableExternal() { + return true; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java index 4dc2b94..d259914 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java @@ -21,9 +21,9 @@ public class TestHCatExternalHCatNonPartitioned extends TestHCatNonPartitioned { - @Override - protected Boolean isTableExternal() { - return true; - } + @Override + protected Boolean isTableExternal() { + return true; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java index 474e825..e5f8d1e 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java @@ -21,9 +21,9 @@ public class TestHCatExternalPartitioned extends TestHCatPartitioned { - @Override - protected Boolean isTableExternal() { - return true; - } + @Override + protected Boolean isTableExternal() { + return true; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveCompatibility.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveCompatibility.java index 4cf3b08..c5dfa43 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveCompatibility.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveCompatibility.java @@ -35,95 +35,95 @@ import org.junit.Test; public class TestHCatHiveCompatibility extends HCatBaseTest { - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - @BeforeClass - public static void createInputData() throws Exception { - int LOOP_SIZE = 11; - File file = new File(INPUT_FILE_NAME); - file.deleteOnExit(); - FileWriter writer = new FileWriter(file); - for (int i = 0; i < LOOP_SIZE; i++) { - writer.write(i + "\t1\n"); - } - writer.close(); + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + @BeforeClass + public static void createInputData() throws Exception { + int LOOP_SIZE = 11; + File file = new File(INPUT_FILE_NAME); + file.deleteOnExit(); + FileWriter writer = new FileWriter(file); + for (int i = 0; i < LOOP_SIZE; i++) { + writer.write(i + "\t1\n"); } + writer.close(); + } - @Test - public void testUnpartedReadWrite() throws Exception { + @Test + public void testUnpartedReadWrite() throws Exception { - driver.run("drop table if exists junit_unparted_noisd"); - String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + driver.run("drop table if exists junit_unparted_noisd"); + String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - // assert that the table created has no hcat instrumentation, and that we're still able to read it. - Table table = client.getTable("default", "junit_unparted_noisd"); - Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created has no hcat instrumentation, and that we're still able to read it. + Table table = client.getTable("default", "junit_unparted_noisd"); + Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - PigServer server = new PigServer(ExecType.LOCAL); - logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer();"); - logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Iterator itr = server.openIterator("B"); + PigServer server = new PigServer(ExecType.LOCAL); + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer();"); + logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator itr = server.openIterator("B"); - int i = 0; + int i = 0; - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(1, t.size()); - Assert.assertEquals(t.get(0), i); - i++; - } + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(1, t.size()); + Assert.assertEquals(t.get(0), i); + i++; + } - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); - // assert that the table created still has no hcat instrumentation - Table table2 = client.getTable("default", "junit_unparted_noisd"); - Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created still has no hcat instrumentation + Table table2 = client.getTable("default", "junit_unparted_noisd"); + Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - driver.run("drop table junit_unparted_noisd"); - } + driver.run("drop table junit_unparted_noisd"); + } - @Test - public void testPartedRead() throws Exception { + @Test + public void testPartedRead() throws Exception { - driver.run("drop table if exists junit_parted_noisd"); - String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + driver.run("drop table if exists junit_parted_noisd"); + String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - // assert that the table created has no hcat instrumentation, and that we're still able to read it. - Table table = client.getTable("default", "junit_parted_noisd"); - Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created has no hcat instrumentation, and that we're still able to read it. + Table table = client.getTable("default", "junit_parted_noisd"); + Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - PigServer server = new PigServer(ExecType.LOCAL); - logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer('b=42');"); - logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Iterator itr = server.openIterator("B"); + PigServer server = new PigServer(ExecType.LOCAL); + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer('b=42');"); + logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator itr = server.openIterator("B"); - int i = 0; + int i = 0; - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); // Contains explicit field "a" and partition "b". - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "42"); - i++; - } + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); // Contains explicit field "a" and partition "b". + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "42"); + i++; + } - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); - // assert that the table created still has no hcat instrumentation - Table table2 = client.getTable("default", "junit_parted_noisd"); - Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + // assert that the table created still has no hcat instrumentation + Table table2 = client.getTable("default", "junit_parted_noisd"); + Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - // assert that there is one partition present, and it had hcat instrumentation inserted when it was created. - Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42")); + // assert that there is one partition present, and it had hcat instrumentation inserted when it was created. + Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42")); - Assert.assertNotNull(ptn); - Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - driver.run("drop table junit_unparted_noisd"); - } + Assert.assertNotNull(ptn); + Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + driver.run("drop table junit_unparted_noisd"); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java index 4dd0506..470ff58 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java @@ -40,77 +40,77 @@ public class TestHCatHiveThriftCompatibility extends HCatBaseTest { - private boolean setUpComplete = false; - private Path intStringSeq; - - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - if (setUpComplete) { - return; - } - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - TIOStreamTransport transport = new TIOStreamTransport(out); - TBinaryProtocol protocol = new TBinaryProtocol(transport); - - IntString intString = new IntString(1, "one", 1); - intString.write(protocol); - BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); - - intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); - LOG.info("Creating data file: " + intStringSeq); - - SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( - intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, - NullWritable.class, BytesWritable.class); - seqFileWriter.append(NullWritable.get(), bytesWritable); - seqFileWriter.close(); - - setUpComplete = true; + private boolean setUpComplete = false; + private Path intStringSeq; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + if (setUpComplete) { + return; } - /** - * Create a table with no explicit schema and ensure its correctly - * discovered from the thrift struct. - */ - @Test - public void testDynamicCols() throws Exception { - Assert.assertEquals(0, driver.run("drop table if exists test_thrift").getResponseCode()); - Assert.assertEquals(0, driver.run( - "create external table test_thrift " + - "partitioned by (year string) " + - "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + - "with serdeproperties ( " + - " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + - " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + - "stored as" + - " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + - " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") - .getResponseCode()); - Assert.assertEquals(0, - driver.run("alter table test_thrift add partition (year = '2012') location '" + - intStringSeq.getParent() + "'").getResponseCode()); - - PigServer pigServer = new PigServer(ExecType.LOCAL); - pigServer.registerQuery("A = load 'test_thrift' using org.apache.hive.hcatalog.pig.HCatLoader();"); - - Schema expectedSchema = new Schema(); - expectedSchema.add(new Schema.FieldSchema("myint", DataType.INTEGER)); - expectedSchema.add(new Schema.FieldSchema("mystring", DataType.CHARARRAY)); - expectedSchema.add(new Schema.FieldSchema("underscore_int", DataType.INTEGER)); - expectedSchema.add(new Schema.FieldSchema("year", DataType.CHARARRAY)); - - Assert.assertEquals(expectedSchema, pigServer.dumpSchema("A")); - - Iterator iterator = pigServer.openIterator("A"); - Tuple t = iterator.next(); - Assert.assertEquals(1, t.get(0)); - Assert.assertEquals("one", t.get(1)); - Assert.assertEquals(1, t.get(2)); - Assert.assertEquals("2012", t.get(3)); - - Assert.assertFalse(iterator.hasNext()); - } + ByteArrayOutputStream out = new ByteArrayOutputStream(); + TIOStreamTransport transport = new TIOStreamTransport(out); + TBinaryProtocol protocol = new TBinaryProtocol(transport); + + IntString intString = new IntString(1, "one", 1); + intString.write(protocol); + BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); + + intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); + LOG.info("Creating data file: " + intStringSeq); + + SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( + intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, + NullWritable.class, BytesWritable.class); + seqFileWriter.append(NullWritable.get(), bytesWritable); + seqFileWriter.close(); + + setUpComplete = true; + } + + /** + * Create a table with no explicit schema and ensure its correctly + * discovered from the thrift struct. + */ + @Test + public void testDynamicCols() throws Exception { + Assert.assertEquals(0, driver.run("drop table if exists test_thrift").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create external table test_thrift " + + "partitioned by (year string) " + + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + + "with serdeproperties ( " + + " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + + " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + + "stored as" + + " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + + " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") + .getResponseCode()); + Assert.assertEquals(0, + driver.run("alter table test_thrift add partition (year = '2012') location '" + + intStringSeq.getParent() + "'").getResponseCode()); + + PigServer pigServer = new PigServer(ExecType.LOCAL); + pigServer.registerQuery("A = load 'test_thrift' using org.apache.hive.hcatalog.pig.HCatLoader();"); + + Schema expectedSchema = new Schema(); + expectedSchema.add(new Schema.FieldSchema("myint", DataType.INTEGER)); + expectedSchema.add(new Schema.FieldSchema("mystring", DataType.CHARARRAY)); + expectedSchema.add(new Schema.FieldSchema("underscore_int", DataType.INTEGER)); + expectedSchema.add(new Schema.FieldSchema("year", DataType.CHARARRAY)); + + Assert.assertEquals(expectedSchema, pigServer.dumpSchema("A")); + + Iterator iterator = pigServer.openIterator("A"); + Tuple t = iterator.next(); + Assert.assertEquals(1, t.get(0)); + Assert.assertEquals("one", t.get(1)); + Assert.assertEquals(1, t.get(2)); + Assert.assertEquals("2012", t.get(3)); + + Assert.assertFalse(iterator.hasNext()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatInputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatInputFormat.java index 765ea14..fe02674 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatInputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatInputFormat.java @@ -42,107 +42,107 @@ public class TestHCatInputFormat extends HCatBaseTest { - private boolean setUpComplete = false; - - /** - * Create an input sequence file with 100 records; every 10th record is bad. - * Load this table into Hive. - */ - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - if (setUpComplete) { - return; - } - - Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); - LOG.info("Creating data file: " + intStringSeq); - SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( - intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, - NullWritable.class, BytesWritable.class); - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - TIOStreamTransport transport = new TIOStreamTransport(out); - TBinaryProtocol protocol = new TBinaryProtocol(transport); - - for (int i = 1; i <= 100; i++) { - if (i % 10 == 0) { - seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes())); - } else { - out.reset(); - IntString intString = new IntString(i, Integer.toString(i), i); - intString.write(protocol); - BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); - seqFileWriter.append(NullWritable.get(), bytesWritable); - } - } - - seqFileWriter.close(); - - // Now let's load this file into a new Hive table. - Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode()); - Assert.assertEquals(0, driver.run( - "create table test_bad_records " + - "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + - "with serdeproperties ( " + - " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + - " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + - "stored as" + - " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + - " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") - .getResponseCode()); - Assert.assertEquals(0, driver.run("load data local inpath '" + intStringSeq.getParent() + - "' into table test_bad_records").getResponseCode()); - - setUpComplete = true; + private boolean setUpComplete = false; + + /** + * Create an input sequence file with 100 records; every 10th record is bad. + * Load this table into Hive. + */ + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + if (setUpComplete) { + return; } - @Test - public void testBadRecordHandlingPasses() throws Exception { - Assert.assertTrue(runJob(0.1f)); + Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); + LOG.info("Creating data file: " + intStringSeq); + SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( + intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, + NullWritable.class, BytesWritable.class); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + TIOStreamTransport transport = new TIOStreamTransport(out); + TBinaryProtocol protocol = new TBinaryProtocol(transport); + + for (int i = 1; i <= 100; i++) { + if (i % 10 == 0) { + seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes())); + } else { + out.reset(); + IntString intString = new IntString(i, Integer.toString(i), i); + intString.write(protocol); + BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); + seqFileWriter.append(NullWritable.get(), bytesWritable); + } } - @Test - public void testBadRecordHandlingFails() throws Exception { - Assert.assertFalse(runJob(0.01f)); - } + seqFileWriter.close(); + + // Now let's load this file into a new Hive table. + Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create table test_bad_records " + + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + + "with serdeproperties ( " + + " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + + " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + + "stored as" + + " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + + " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") + .getResponseCode()); + Assert.assertEquals(0, driver.run("load data local inpath '" + intStringSeq.getParent() + + "' into table test_bad_records").getResponseCode()); - private boolean runJob(float badRecordThreshold) throws Exception { - Configuration conf = new Configuration(); + setUpComplete = true; + } - conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); + @Test + public void testBadRecordHandlingPasses() throws Exception { + Assert.assertTrue(runJob(0.1f)); + } - Job job = new Job(conf); - job.setJarByClass(this.getClass()); - job.setMapperClass(MyMapper.class); + @Test + public void testBadRecordHandlingFails() throws Exception { + Assert.assertFalse(runJob(0.01f)); + } - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); + private boolean runJob(float badRecordThreshold) throws Exception { + Configuration conf = new Configuration(); - HCatInputFormat.setInput(job, "default", "test_bad_records"); + conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); - job.setMapOutputKeyClass(HCatRecord.class); - job.setMapOutputValueClass(HCatRecord.class); + Job job = new Job(conf); + job.setJarByClass(this.getClass()); + job.setMapperClass(MyMapper.class); - job.setNumReduceTasks(0); + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); - Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); - if (path.getFileSystem(conf).exists(path)) { - path.getFileSystem(conf).delete(path, true); - } + HCatInputFormat.setInput(job, "default", "test_bad_records"); - TextOutputFormat.setOutputPath(job, path); + job.setMapOutputKeyClass(HCatRecord.class); + job.setMapOutputValueClass(HCatRecord.class); - return job.waitForCompletion(true); + job.setNumReduceTasks(0); + + Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); + if (path.getFileSystem(conf).exists(path)) { + path.getFileSystem(conf).delete(path, true); } - public static class MyMapper extends Mapper { - @Override - public void map(NullWritable key, HCatRecord value, Context context) - throws IOException, InterruptedException { - LOG.info("HCatRecord: " + value); - context.write(NullWritable.get(), new Text(value.toString())); - } + TextOutputFormat.setOutputPath(job, path); + + return job.waitForCompletion(true); + } + + public static class MyMapper extends Mapper { + @Override + public void map(NullWritable key, HCatRecord value, Context context) + throws IOException, InterruptedException { + LOG.info("HCatRecord: " + value); + context.write(NullWritable.get(), new Text(value.toString())); } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java index 2b435a7..2ea5b9e 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java @@ -73,358 +73,358 @@ public class TestHCatMultiOutputFormat { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatMultiOutputFormat.class); - - private static final String DATABASE = "default"; - private static final String[] tableNames = {"test1", "test2", "test3"}; - private static final String[] tablePerms = {"755", "750", "700"}; - private static Path warehousedir = null; - private static HashMap schemaMap = new HashMap(); - private static HiveMetaStoreClient hmsc; - private static MiniMRCluster mrCluster; - private static Configuration mrConf; - private static HiveConf hiveConf; - private static File workDir; - - private static final String msPort = "20199"; - private static Thread t; - - static { - schemaMap.put(tableNames[0], new HCatSchema(ColumnHolder.hCattest1Cols)); - schemaMap.put(tableNames[1], new HCatSchema(ColumnHolder.hCattest2Cols)); - schemaMap.put(tableNames[2], new HCatSchema(ColumnHolder.hCattest3Cols)); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatMultiOutputFormat.class); + + private static final String DATABASE = "default"; + private static final String[] tableNames = {"test1", "test2", "test3"}; + private static final String[] tablePerms = {"755", "750", "700"}; + private static Path warehousedir = null; + private static HashMap schemaMap = new HashMap(); + private static HiveMetaStoreClient hmsc; + private static MiniMRCluster mrCluster; + private static Configuration mrConf; + private static HiveConf hiveConf; + private static File workDir; + + private static final String msPort = "20199"; + private static Thread t; + + static { + schemaMap.put(tableNames[0], new HCatSchema(ColumnHolder.hCattest1Cols)); + schemaMap.put(tableNames[1], new HCatSchema(ColumnHolder.hCattest2Cols)); + schemaMap.put(tableNames[2], new HCatSchema(ColumnHolder.hCattest3Cols)); + } + + private static class RunMS implements Runnable { + + @Override + public void run() { + try { + String warehouseConf = HiveConf.ConfVars.METASTOREWAREHOUSE.varname + "=" + + warehousedir.toString(); + HiveMetaStore.main(new String[]{"-v", "-p", msPort, "--hiveconf", warehouseConf}); + } catch (Throwable t) { + System.err.println("Exiting. Got exception from metastore: " + t.getMessage()); + } } - private static class RunMS implements Runnable { - - @Override - public void run() { - try { - String warehouseConf = HiveConf.ConfVars.METASTOREWAREHOUSE.varname + "=" - + warehousedir.toString(); - HiveMetaStore.main(new String[]{"-v", "-p", msPort, "--hiveconf", warehouseConf}); - } catch (Throwable t) { - System.err.println("Exiting. Got exception from metastore: " + t.getMessage()); - } - } + } - } + /** + * Private class which holds all the data for the test cases + */ + private static class ColumnHolder { - /** - * Private class which holds all the data for the test cases - */ - private static class ColumnHolder { - - private static ArrayList hCattest1Cols = new ArrayList(); - private static ArrayList hCattest2Cols = new ArrayList(); - private static ArrayList hCattest3Cols = new ArrayList(); - - private static ArrayList partitionCols = new ArrayList(); - private static ArrayList test1Cols = new ArrayList(); - private static ArrayList test2Cols = new ArrayList(); - private static ArrayList test3Cols = new ArrayList(); - - private static HashMap> colMapping = new HashMap>(); - - static { - try { - FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); - test1Cols.add(keyCol); - test2Cols.add(keyCol); - test3Cols.add(keyCol); - hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); - test1Cols.add(valueCol); - test3Cols.add(valueCol); - hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); - FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); - test3Cols.add(extraCol); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); - colMapping.put("test1", test1Cols); - colMapping.put("test2", test2Cols); - colMapping.put("test3", test3Cols); - } catch (HCatException e) { - LOG.error("Error in setting up schema fields for the table", e); - throw new RuntimeException(e); - } - } + private static ArrayList hCattest1Cols = new ArrayList(); + private static ArrayList hCattest2Cols = new ArrayList(); + private static ArrayList hCattest3Cols = new ArrayList(); - static { - partitionCols.add(new FieldSchema("ds", serdeConstants.STRING_TYPE_NAME, "")); - partitionCols.add(new FieldSchema("cluster", serdeConstants.STRING_TYPE_NAME, "")); - } - } + private static ArrayList partitionCols = new ArrayList(); + private static ArrayList test1Cols = new ArrayList(); + private static ArrayList test2Cols = new ArrayList(); + private static ArrayList test3Cols = new ArrayList(); - @BeforeClass - public static void setup() throws Exception { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; - workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); - - warehousedir = new Path(workDir + "/warehouse"); - - // Run hive metastore server - t = new Thread(new RunMS()); - t.start(); - - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - FileSystem fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); - fs.mkdirs(warehousedir); - - initializeSetup(); - } + private static HashMap> colMapping = new HashMap>(); - private static void initializeSetup() throws Exception { - - hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); - try { - hmsc = new HiveMetaStoreClient(hiveConf, null); - initalizeTables(); - } catch (Throwable e) { - LOG.error("Exception encountered while setting up testcase", e); - throw new Exception(e); - } finally { - hmsc.close(); - } + static { + try { + FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); + test1Cols.add(keyCol); + test2Cols.add(keyCol); + test3Cols.add(keyCol); + hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); + test1Cols.add(valueCol); + test3Cols.add(valueCol); + hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); + FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); + test3Cols.add(extraCol); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); + colMapping.put("test1", test1Cols); + colMapping.put("test2", test2Cols); + colMapping.put("test3", test3Cols); + } catch (HCatException e) { + LOG.error("Error in setting up schema fields for the table", e); + throw new RuntimeException(e); + } } - private static void initalizeTables() throws Exception { - for (String table : tableNames) { - try { - if (hmsc.getTable(DATABASE, table) != null) { - hmsc.dropTable(DATABASE, table); - } - } catch (NoSuchObjectException ignored) { - } - } - for (int i = 0; i < tableNames.length; i++) { - createTable(tableNames[i], tablePerms[i]); - } + static { + partitionCols.add(new FieldSchema("ds", serdeConstants.STRING_TYPE_NAME, "")); + partitionCols.add(new FieldSchema("cluster", serdeConstants.STRING_TYPE_NAME, "")); } - - private static void createTable(String tableName, String tablePerm) throws Exception { - Table tbl = new Table(); - tbl.setDbName(DATABASE); - tbl.setTableName(tableName); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(ColumnHolder.colMapping.get(tableName)); - tbl.setSd(sd); - sd.setParameters(new HashMap()); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); - sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib( - org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); - tbl.setPartitionKeys(ColumnHolder.partitionCols); - - hmsc.createTable(tbl); - FileSystem fs = FileSystem.get(mrConf); - fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); + } + + @BeforeClass + public static void setup() throws Exception { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; + workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + + warehousedir = new Path(workDir + "/warehouse"); + + // Run hive metastore server + t = new Thread(new RunMS()); + t.start(); + + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + FileSystem fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + fs.mkdirs(warehousedir); + + initializeSetup(); + } + + private static void initializeSetup() throws Exception { + + hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); + try { + hmsc = new HiveMetaStoreClient(hiveConf, null); + initalizeTables(); + } catch (Throwable e) { + LOG.error("Exception encountered while setting up testcase", e); + throw new Exception(e); + } finally { + hmsc.close(); } + } - @AfterClass - public static void tearDown() throws IOException { - FileUtil.fullyDelete(workDir); - FileSystem fs = FileSystem.get(mrConf); - if (fs.exists(warehousedir)) { - fs.delete(warehousedir, true); - } - if (mrCluster != null) { - mrCluster.shutdown(); + private static void initalizeTables() throws Exception { + for (String table : tableNames) { + try { + if (hmsc.getTable(DATABASE, table) != null) { + hmsc.dropTable(DATABASE, table); } + } catch (NoSuchObjectException ignored) { + } } + for (int i = 0; i < tableNames.length; i++) { + createTable(tableNames[i], tablePerms[i]); + } + } + + private static void createTable(String tableName, String tablePerm) throws Exception { + Table tbl = new Table(); + tbl.setDbName(DATABASE); + tbl.setTableName(tableName); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(ColumnHolder.colMapping.get(tableName)); + tbl.setSd(sd); + sd.setParameters(new HashMap()); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); + sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib( + org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); + tbl.setPartitionKeys(ColumnHolder.partitionCols); + + hmsc.createTable(tbl); + FileSystem fs = FileSystem.get(mrConf); + fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); + } + + @AfterClass + public static void tearDown() throws IOException { + FileUtil.fullyDelete(workDir); + FileSystem fs = FileSystem.get(mrConf); + if (fs.exists(warehousedir)) { + fs.delete(warehousedir, true); + } + if (mrCluster != null) { + mrCluster.shutdown(); + } + } + + /** + * Simple test case. + *
    + *
  1. Submits a mapred job which writes out one fixed line to each of the tables
  2. + *
  3. uses hive fetch task to read the data and see if it matches what was written
  4. + *
+ * + * @throws Exception if any error occurs + */ + @Test + public void testOutputFormat() throws Throwable { + HashMap partitionValues = new HashMap(); + partitionValues.put("ds", "1"); + partitionValues.put("cluster", "ag"); + ArrayList infoList = new ArrayList(); + infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); + infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); + infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); + + Job job = new Job(hiveConf, "SampleJob"); + + job.setMapperClass(MyMapper.class); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setNumReduceTasks(0); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + + for (int i = 0; i < tableNames.length; i++) { + configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class, + HCatRecord.class); + HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); + HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), + schemaMap.get(tableNames[i])); + } + configurer.configure(); - /** - * Simple test case. - *
    - *
  1. Submits a mapred job which writes out one fixed line to each of the tables
  2. - *
  3. uses hive fetch task to read the data and see if it matches what was written
  4. - *
- * - * @throws Exception if any error occurs - */ - @Test - public void testOutputFormat() throws Throwable { - HashMap partitionValues = new HashMap(); - partitionValues.put("ds", "1"); - partitionValues.put("cluster", "ag"); - ArrayList infoList = new ArrayList(); - infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); - infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); - infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); - - Job job = new Job(hiveConf, "SampleJob"); - - job.setMapperClass(MyMapper.class); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setNumReduceTasks(0); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - - for (int i = 0; i < tableNames.length; i++) { - configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class, - HCatRecord.class); - HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); - HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), - schemaMap.get(tableNames[i])); - } - configurer.configure(); - - Path filePath = createInputFile(); - FileInputFormat.addInputPath(job, filePath); - Assert.assertTrue(job.waitForCompletion(true)); + Path filePath = createInputFile(); + FileInputFormat.addInputPath(job, filePath); + Assert.assertTrue(job.waitForCompletion(true)); - ArrayList outputs = new ArrayList(); - for (String tbl : tableNames) { - outputs.add(getTableData(tbl, "default").get(0)); - } - Assert.assertEquals("Comparing output of table " + - tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); - Assert.assertEquals("Comparing output of table " + - tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); - Assert.assertEquals("Comparing output of table " + - tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); - - // Check permisssion on partition dirs and files created - for (int i = 0; i < tableNames.length; i++) { - Path partitionFile = new Path(warehousedir + "/" + tableNames[i] - + "/ds=1/cluster=ag/part-m-00000"); - FileSystem fs = partitionFile.getFileSystem(mrConf); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile).getPermission(), - new FsPermission(tablePerms[i])); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile.getParent()).getPermission(), - new FsPermission(tablePerms[i])); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), - new FsPermission(tablePerms[i])); - - } - LOG.info("File permissions verified"); + ArrayList outputs = new ArrayList(); + for (String tbl : tableNames) { + outputs.add(getTableData(tbl, "default").get(0)); } + Assert.assertEquals("Comparing output of table " + + tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); + Assert.assertEquals("Comparing output of table " + + tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); + Assert.assertEquals("Comparing output of table " + + tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); + + // Check permisssion on partition dirs and files created + for (int i = 0; i < tableNames.length; i++) { + Path partitionFile = new Path(warehousedir + "/" + tableNames[i] + + "/ds=1/cluster=ag/part-m-00000"); + FileSystem fs = partitionFile.getFileSystem(mrConf); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile).getPermission(), + new FsPermission(tablePerms[i])); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile.getParent()).getPermission(), + new FsPermission(tablePerms[i])); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), + new FsPermission(tablePerms[i])); - /** - * Create a input file for map - * - * @return absolute path of the file. - * @throws IOException if any error encountered - */ - private Path createInputFile() throws IOException { - Path f = new Path(workDir + "/MultiTableInput.txt"); - FileSystem fs = FileSystem.get(mrConf); - if (fs.exists(f)) { - fs.delete(f, true); - } - OutputStream out = fs.create(f); - for (int i = 0; i < 3; i++) { - out.write("a,a\n".getBytes()); - } - out.close(); - return f; } - - /** - * Method to fetch table data - * - * @param table table name - * @param database database - * @return list of columns in comma seperated way - * @throws Exception if any error occurs - */ - private List getTableData(String table, String database) throws Exception { - HiveConf conf = new HiveConf(); - conf.addResource("hive-site.xml"); - ArrayList results = new ArrayList(); - ArrayList temp = new ArrayList(); - Hive hive = Hive.get(conf); - org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); - FetchWork work; - if (!tbl.getPartCols().isEmpty()) { - List partitions = hive.getPartitions(tbl); - List partDesc = new ArrayList(); - List partLocs = new ArrayList(); - for (Partition part : partitions) { - partLocs.add(part.getLocation()); - partDesc.add(Utilities.getPartitionDesc(part)); - } - work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); - work.setLimit(100); - } else { - work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); - } - FetchTask task = new FetchTask(); - task.setWork(work); - task.initialize(conf, null, null); - task.fetch(temp); - for (String str : temp) { - results.add(str.replace("\t", ",")); - } - return results; + LOG.info("File permissions verified"); + } + + /** + * Create a input file for map + * + * @return absolute path of the file. + * @throws IOException if any error encountered + */ + private Path createInputFile() throws IOException { + Path f = new Path(workDir + "/MultiTableInput.txt"); + FileSystem fs = FileSystem.get(mrConf); + if (fs.exists(f)) { + fs.delete(f, true); } - - private static class MyMapper extends - Mapper { - - private int i = 0; - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - HCatRecord record = null; - String[] splits = value.toString().split(","); - switch (i) { - case 0: - record = new DefaultHCatRecord(2); - record.set(0, splits[0]); - record.set(1, splits[1]); - break; - case 1: - record = new DefaultHCatRecord(1); - record.set(0, splits[0]); - break; - case 2: - record = new DefaultHCatRecord(3); - record.set(0, splits[0]); - record.set(1, splits[1]); - record.set(2, "extra"); - break; - default: - Assert.fail("This should not happen!!!!!"); - } - MultiOutputFormat.write(tableNames[i], null, record, context); - i++; - } + OutputStream out = fs.create(f); + for (int i = 0; i < 3; i++) { + out.write("a,a\n".getBytes()); + } + out.close(); + return f; + } + + /** + * Method to fetch table data + * + * @param table table name + * @param database database + * @return list of columns in comma seperated way + * @throws Exception if any error occurs + */ + private List getTableData(String table, String database) throws Exception { + HiveConf conf = new HiveConf(); + conf.addResource("hive-site.xml"); + ArrayList results = new ArrayList(); + ArrayList temp = new ArrayList(); + Hive hive = Hive.get(conf); + org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); + FetchWork work; + if (!tbl.getPartCols().isEmpty()) { + List partitions = hive.getPartitions(tbl); + List partDesc = new ArrayList(); + List partLocs = new ArrayList(); + for (Partition part : partitions) { + partLocs.add(part.getLocation()); + partDesc.add(Utilities.getPartitionDesc(part)); + } + work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); + work.setLimit(100); + } else { + work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); + } + FetchTask task = new FetchTask(); + task.setWork(work); + task.initialize(conf, null, null); + task.fetch(temp); + for (String str : temp) { + results.add(str.replace("\t", ",")); + } + return results; + } + + private static class MyMapper extends + Mapper { + + private int i = 0; + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + HCatRecord record = null; + String[] splits = value.toString().split(","); + switch (i) { + case 0: + record = new DefaultHCatRecord(2); + record.set(0, splits[0]); + record.set(1, splits[1]); + break; + case 1: + record = new DefaultHCatRecord(1); + record.set(0, splits[0]); + break; + case 2: + record = new DefaultHCatRecord(3); + record.set(0, splits[0]); + record.set(1, splits[1]); + record.set(2, "extra"); + break; + default: + Assert.fail("This should not happen!!!!!"); + } + MultiOutputFormat.write(tableNames[i], null, record, context); + i++; } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java index 2368417..e8aaab9 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java @@ -41,97 +41,97 @@ public class TestHCatNonPartitioned extends HCatMapReduceTest { - private static List writeRecords; - static List partitionColumns; + private static List writeRecords; + static List partitionColumns; - @BeforeClass - public static void oneTimeSetUp() throws Exception { + @BeforeClass + public static void oneTimeSetUp() throws Exception { - dbName = null; //test if null dbName works ("default" is used) - tableName = "testHCatNonPartitionedTable"; + dbName = null; //test if null dbName works ("default" is used) + tableName = "testHCatNonPartitionedTable"; - writeRecords = new ArrayList(); + writeRecords = new ArrayList(); - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); - objList.add(i); - objList.add("strvalue" + i); - writeRecords.add(new DefaultHCatRecord(objList)); - } - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - } - - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - //empty list, non partitioned - return fields; + objList.add(i); + objList.add("strvalue" + i); + writeRecords.add(new DefaultHCatRecord(objList)); } - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + } + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + //empty list, non partitioned + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + + @Test + public void testHCatNonPartitionedTable() throws Exception { + + Map partitionMap = new HashMap(); + runMRCreate(null, partitionColumns, writeRecords, 10, true); + + //Test for duplicate publish + IOException exc = null; + try { + runMRCreate(null, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_NON_EMPTY_TABLE, ((HCatException) exc).getErrorType()); - @Test - public void testHCatNonPartitionedTable() throws Exception { + //Test for publish with invalid partition key name + exc = null; + partitionMap.clear(); + partitionMap.put("px", "p1value2"); - Map partitionMap = new HashMap(); - runMRCreate(null, partitionColumns, writeRecords, 10, true); - - //Test for duplicate publish - IOException exc = null; - try { - runMRCreate(null, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_NON_EMPTY_TABLE, ((HCatException) exc).getErrorType()); + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); - //Test for publish with invalid partition key name - exc = null; - partitionMap.clear(); - partitionMap.put("px", "p1value2"); + //Read should get 10 rows + runMRRead(10); - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + hiveReadTest(); + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); + //Test that data inserted through hcatoutputformat is readable from hive + private void hiveReadTest() throws Exception { - //Read should get 10 rows - runMRRead(10); + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); - hiveReadTest(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } - //Test that data inserted through hcatoutputformat is readable from hive - private void hiveReadTest() throws Exception { - - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(10, res.size()); - } + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(10, res.size()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java index 4c71869..956f96f 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java @@ -47,121 +47,121 @@ public class TestHCatOutputFormat extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatOutputFormat.class); - private HiveMetaStoreClient client; - private HiveConf hiveConf; - - private static final String dbName = "hcatOutputFormatTestDB"; - private static final String tblName = "hcatOutputFormatTestTable"; - - @Override - protected void setUp() throws Exception { - super.setUp(); - hiveConf = new HiveConf(this.getClass()); - - try { - client = new HiveMetaStoreClient(hiveConf, null); - - initTable(); - } catch (Throwable e) { - LOG.error("Unable to open the metastore", e); - throw new Exception(e); - } - } - - @Override - protected void tearDown() throws Exception { - try { - super.tearDown(); - client.dropTable(dbName, tblName); - client.dropDatabase(dbName); - - client.close(); - } catch (Throwable e) { - LOG.error("Unable to close metastore", e); - throw new Exception(e); - } - } - - private void initTable() throws Exception { - - try { - client.dropTable(dbName, tblName); - } catch (Exception e) { - } - try { - client.dropDatabase(dbName); - } catch (Exception e) { - } - client.createDatabase(new Database(dbName, "", null, null)); - assertNotNull((client.getDatabase(dbName).getLocationUri())); - - List fields = new ArrayList(); - fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); - - Table tbl = new Table(); - tbl.setDbName(dbName); - tbl.setTableName(tblName); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(fields); - tbl.setSd(sd); - - //sd.setLocation("hdfs://tmp"); - sd.setInputFormat(RCFileInputFormat.class.getName()); - sd.setOutputFormat(RCFileOutputFormat.class.getName()); - sd.setParameters(new HashMap()); - sd.getParameters().put("test_param_1", "Use this for comments etc"); - //sd.setBucketCols(new ArrayList(2)); - //sd.getBucketCols().add("name"); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib( - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); - tbl.setPartitionKeys(fields); - - Map tableParams = new HashMap(); - tableParams.put("hcat.testarg", "testArgValue"); - - tbl.setParameters(tableParams); - - client.createTable(tbl); - Path tblPath = new Path(client.getTable(dbName, tblName).getSd().getLocation()); - assertTrue(tblPath.getFileSystem(hiveConf).mkdirs(new Path(tblPath, "colname=p1"))); - - } - - public void testSetOutput() throws Exception { - Configuration conf = new Configuration(); - Job job = new Job(conf, "test outputformat"); + private static final Logger LOG = LoggerFactory.getLogger(TestHCatOutputFormat.class); + private HiveMetaStoreClient client; + private HiveConf hiveConf; - Map partitionValues = new HashMap(); - partitionValues.put("colname", "p1"); - //null server url means local mode - OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); + private static final String dbName = "hcatOutputFormatTestDB"; + private static final String tblName = "hcatOutputFormatTestTable"; - HCatOutputFormat.setOutput(job, info); - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job); + @Override + protected void setUp() throws Exception { + super.setUp(); + hiveConf = new HiveConf(this.getClass()); - assertNotNull(jobInfo.getTableInfo()); - assertEquals(1, jobInfo.getPartitionValues().size()); - assertEquals("p1", jobInfo.getPartitionValues().get("colname")); - assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); - assertEquals("colname", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); + try { + client = new HiveMetaStoreClient(hiveConf, null); - publishTest(job); + initTable(); + } catch (Throwable e) { + LOG.error("Unable to open the metastore", e); + throw new Exception(e); } + } + + @Override + protected void tearDown() throws Exception { + try { + super.tearDown(); + client.dropTable(dbName, tblName); + client.dropDatabase(dbName); + + client.close(); + } catch (Throwable e) { + LOG.error("Unable to close metastore", e); + throw new Exception(e); + } + } - public void publishTest(Job job) throws Exception { - OutputCommitter committer = new FileOutputCommitterContainer(job, null); - committer.commitJob(job); - - Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); - assertNotNull(part); + private void initTable() throws Exception { - StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); - assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); - assertTrue(part.getSd().getLocation().indexOf("p1") != -1); + try { + client.dropTable(dbName, tblName); + } catch (Exception e) { + } + try { + client.dropDatabase(dbName); + } catch (Exception e) { } + client.createDatabase(new Database(dbName, "", null, null)); + assertNotNull((client.getDatabase(dbName).getLocationUri())); + + List fields = new ArrayList(); + fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); + + Table tbl = new Table(); + tbl.setDbName(dbName); + tbl.setTableName(tblName); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(fields); + tbl.setSd(sd); + + //sd.setLocation("hdfs://tmp"); + sd.setInputFormat(RCFileInputFormat.class.getName()); + sd.setOutputFormat(RCFileOutputFormat.class.getName()); + sd.setParameters(new HashMap()); + sd.getParameters().put("test_param_1", "Use this for comments etc"); + //sd.setBucketCols(new ArrayList(2)); + //sd.getBucketCols().add("name"); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib( + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); + tbl.setPartitionKeys(fields); + + Map tableParams = new HashMap(); + tableParams.put("hcat.testarg", "testArgValue"); + + tbl.setParameters(tableParams); + + client.createTable(tbl); + Path tblPath = new Path(client.getTable(dbName, tblName).getSd().getLocation()); + assertTrue(tblPath.getFileSystem(hiveConf).mkdirs(new Path(tblPath, "colname=p1"))); + + } + + public void testSetOutput() throws Exception { + Configuration conf = new Configuration(); + Job job = new Job(conf, "test outputformat"); + + Map partitionValues = new HashMap(); + partitionValues.put("colname", "p1"); + //null server url means local mode + OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); + + HCatOutputFormat.setOutput(job, info); + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job); + + assertNotNull(jobInfo.getTableInfo()); + assertEquals(1, jobInfo.getPartitionValues().size()); + assertEquals("p1", jobInfo.getPartitionValues().get("colname")); + assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); + assertEquals("colname", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); + + publishTest(job); + } + + public void publishTest(Job job) throws Exception { + OutputCommitter committer = new FileOutputCommitterContainer(job, null); + committer.commitJob(job); + + Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); + assertNotNull(part); + + StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); + assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); + assertTrue(part.getSd().getLocation().indexOf("p1") != -1); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java index 6341f64..383a7a2 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java @@ -66,201 +66,201 @@ import org.junit.Test; public class TestHCatPartitionPublish { - private static Configuration mrConf = null; - private static FileSystem fs = null; - private static MiniMRCluster mrCluster = null; - private static boolean isServerRunning = false; - private static final int msPort = 20101; - private static HiveConf hcatConf; - private static HiveMetaStoreClient msc; - private static SecurityManager securityManager; - - @BeforeClass - public static void setup() throws Exception { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_hcat_partitionpublish_" + Math.abs(new Random().nextLong()) + "/"; - File workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); - - if (isServerRunning) { - return; - } - - MetaStoreUtils.startMetaStore(msPort, ShimLoader - .getHadoopThriftAuthBridge()); - Thread.sleep(10000); - isServerRunning = true; - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - - hcatConf = new HiveConf(TestHCatPartitionPublish.class); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - msc = new HiveMetaStoreClient(hcatConf, null); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + private static Configuration mrConf = null; + private static FileSystem fs = null; + private static MiniMRCluster mrCluster = null; + private static boolean isServerRunning = false; + private static final int msPort = 20101; + private static HiveConf hcatConf; + private static HiveMetaStoreClient msc; + private static SecurityManager securityManager; + + @BeforeClass + public static void setup() throws Exception { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_hcat_partitionpublish_" + Math.abs(new Random().nextLong()) + "/"; + File workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + + if (isServerRunning) { + return; } - @AfterClass - public static void tearDown() throws IOException { - if (mrCluster != null) { - mrCluster.shutdown(); - } - System.setSecurityManager(securityManager); - isServerRunning = false; + MetaStoreUtils.startMetaStore(msPort, ShimLoader + .getHadoopThriftAuthBridge()); + Thread.sleep(10000); + isServerRunning = true; + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + + hcatConf = new HiveConf(TestHCatPartitionPublish.class); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + msc = new HiveMetaStoreClient(hcatConf, null); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + @AfterClass + public static void tearDown() throws IOException { + if (mrCluster != null) { + mrCluster.shutdown(); + } + System.setSecurityManager(securityManager); + isServerRunning = false; + } + + @Test + public void testPartitionPublish() throws Exception { + String dbName = "default"; + String tableName = "testHCatPartitionedTable"; + createTable(null, tableName); + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value1"); + partitionMap.put("part0", "p0value1"); + + ArrayList hcatTableColumns = new ArrayList(); + for (FieldSchema fs : getTableColumns()) { + hcatTableColumns.add(HCatSchemaUtils.getHCatFieldSchema(fs)); } - @Test - public void testPartitionPublish() throws Exception { - String dbName = "default"; - String tableName = "testHCatPartitionedTable"; - createTable(null, tableName); - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value1"); - partitionMap.put("part0", "p0value1"); - - ArrayList hcatTableColumns = new ArrayList(); - for (FieldSchema fs : getTableColumns()) { - hcatTableColumns.add(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - - runMRCreateFail(dbName, tableName, partitionMap, hcatTableColumns); - List ptns = msc.listPartitionNames(dbName, tableName, - (short) 10); - Assert.assertEquals(0, ptns.size()); - Table table = msc.getTable(dbName, tableName); - Assert.assertTrue(table != null); - // In Windows, we cannot remove the output directory when job fail. See - // FileOutputCommitterContainer.abortJob - if (!Shell.WINDOWS) { - Assert.assertFalse(fs.exists(new Path(table.getSd().getLocation() - + "/part1=p1value1/part0=p0value1"))); - } + runMRCreateFail(dbName, tableName, partitionMap, hcatTableColumns); + List ptns = msc.listPartitionNames(dbName, tableName, + (short) 10); + Assert.assertEquals(0, ptns.size()); + Table table = msc.getTable(dbName, tableName); + Assert.assertTrue(table != null); + // In Windows, we cannot remove the output directory when job fail. See + // FileOutputCommitterContainer.abortJob + if (!Shell.WINDOWS) { + Assert.assertFalse(fs.exists(new Path(table.getSd().getLocation() + + "/part1=p1value1/part0=p0value1"))); } + } - void runMRCreateFail( - String dbName, String tableName, Map partitionValues, - List columns) throws Exception { + void runMRCreateFail( + String dbName, String tableName, Map partitionValues, + List columns) throws Exception { - Job job = new Job(mrConf, "hcat mapreduce write fail test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(TestHCatPartitionPublish.MapFail.class); + Job job = new Job(mrConf, "hcat mapreduce write fail test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(TestHCatPartitionPublish.MapFail.class); - // input/output settings - job.setInputFormatClass(TextInputFormat.class); + // input/output settings + job.setInputFormatClass(TextInputFormat.class); - Path path = new Path(fs.getWorkingDirectory(), - "mapred/testHCatMapReduceInput"); - // The write count does not matter, as the map will fail in its first - // call. - createInputFile(path, 5); + Path path = new Path(fs.getWorkingDirectory(), + "mapred/testHCatMapReduceInput"); + // The write count does not matter, as the map will fail in its first + // call. + createInputFile(path, 5); - TextInputFormat.setInputPaths(job, path); - job.setOutputFormatClass(HCatOutputFormat.class); - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, - partitionValues); - HCatOutputFormat.setOutput(job, outputJobInfo); + TextInputFormat.setInputPaths(job, path); + job.setOutputFormatClass(HCatOutputFormat.class); + OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, + partitionValues); + HCatOutputFormat.setOutput(job, outputJobInfo); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(DefaultHCatRecord.class); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); + job.setNumReduceTasks(0); - HCatOutputFormat.setSchema(job, new HCatSchema(columns)); + HCatOutputFormat.setSchema(job, new HCatSchema(columns)); - boolean success = job.waitForCompletion(true); - Assert.assertTrue(success == false); - } + boolean success = job.waitForCompletion(true); + Assert.assertTrue(success == false); + } - private void createInputFile(Path path, int rowCount) throws IOException { - if (fs.exists(path)) { - fs.delete(path, true); - } - FSDataOutputStream os = fs.create(path); - for (int i = 0; i < rowCount; i++) { - os.writeChars(i + "\n"); - } - os.close(); + private void createInputFile(Path path, int rowCount) throws IOException { + if (fs.exists(path)) { + fs.delete(path, true); } - - public static class MapFail extends - Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - { - throw new IOException("Exception to mimic job failure."); - } - } + FSDataOutputStream os = fs.create(path); + for (int i = 0; i < rowCount; i++) { + os.writeChars(i + "\n"); } - - private void createTable(String dbName, String tableName) throws Exception { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME - : dbName; - try { - msc.dropTable(databaseName, tableName); - } catch (Exception e) { - } // can fail with NoSuchObjectException - - Table tbl = new Table(); - tbl.setDbName(databaseName); - tbl.setTableName(tableName); - tbl.setTableType("MANAGED_TABLE"); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(getTableColumns()); - tbl.setPartitionKeys(getPartitionKeys()); - tbl.setSd(sd); - sd.setBucketCols(new ArrayList(2)); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib(ColumnarSerDe.class.getName()); - sd.setInputFormat(RCFileInputFormat.class.getName()); - sd.setOutputFormat(RCFileOutputFormat.class.getName()); - - Map tableParams = new HashMap(); - tbl.setParameters(tableParams); - - msc.createTable(tbl); - } - - protected List getPartitionKeys() { - List fields = new ArrayList(); - // Defining partition names in unsorted order - fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); - fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + os.close(); + } + + public static class MapFail extends + Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + { + throw new IOException("Exception to mimic job failure."); + } } + } + + private void createTable(String dbName, String tableName) throws Exception { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME + : dbName; + try { + msc.dropTable(databaseName, tableName); + } catch (Exception e) { + } // can fail with NoSuchObjectException + + Table tbl = new Table(); + tbl.setDbName(databaseName); + tbl.setTableName(tableName); + tbl.setTableType("MANAGED_TABLE"); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(getTableColumns()); + tbl.setPartitionKeys(getPartitionKeys()); + tbl.setSd(sd); + sd.setBucketCols(new ArrayList(2)); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib(ColumnarSerDe.class.getName()); + sd.setInputFormat(RCFileInputFormat.class.getName()); + sd.setOutputFormat(RCFileOutputFormat.class.getName()); + + Map tableParams = new HashMap(); + tbl.setParameters(tableParams); + + msc.createTable(tbl); + } + + protected List getPartitionKeys() { + List fields = new ArrayList(); + // Defining partition names in unsorted order + fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); + fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java index c11af31..577e06d 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java @@ -42,310 +42,310 @@ public class TestHCatPartitioned extends HCatMapReduceTest { - private static List writeRecords; - private static List partitionColumns; + private static List writeRecords; + private static List partitionColumns; - @BeforeClass - public static void oneTimeSetUp() throws Exception { + @BeforeClass + public static void oneTimeSetUp() throws Exception { - tableName = "testHCatPartitionedTable"; - writeRecords = new ArrayList(); + tableName = "testHCatPartitionedTable"; + writeRecords = new ArrayList(); - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); - objList.add(i); - objList.add("strvalue" + i); - writeRecords.add(new DefaultHCatRecord(objList)); - } + objList.add(i); + objList.add("strvalue" + i); + writeRecords.add(new DefaultHCatRecord(objList)); + } + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + } + + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + //Defining partition names in unsorted order + fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); + fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + + @Test + public void testHCatPartitionedTable() throws Exception { + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value1"); + partitionMap.put("part0", "p0value1"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + partitionMap.clear(); + partitionMap.put("PART1", "p1value2"); + partitionMap.put("PART0", "p0value2"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + + //Test for duplicate publish + IOException exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType()); + + //Test for publish with invalid partition key name + exc = null; + partitionMap.clear(); + partitionMap.put("px1", "p1value2"); + partitionMap.put("px0", "p0value2"); - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - //Defining partition names in unsorted order - fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); - fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType()); + + //Test for publish with missing partition key values + exc = null; + partitionMap.clear(); + partitionMap.put("px", "p1value2"); + + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); - @Test - public void testHCatPartitionedTable() throws Exception { - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value1"); - partitionMap.put("part0", "p0value1"); + //Test for null partition value map + exc = null; + try { + runMRCreate(null, partitionColumns, writeRecords, 20, false); + } catch (IOException e) { + exc = e; + } - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + assertTrue(exc == null); +// assertTrue(exc instanceof HCatException); +// assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType()); + // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values - partitionMap.clear(); - partitionMap.put("PART1", "p1value2"); - partitionMap.put("PART0", "p0value2"); + //Read should get 10 + 20 rows + runMRRead(30); - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + //Read with partition filter + runMRRead(10, "part1 = \"p1value1\""); + runMRRead(20, "part1 = \"p1value2\""); + runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\""); + runMRRead(10, "part0 = \"p0value1\""); + runMRRead(20, "part0 = \"p0value2\""); + runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\""); - //Test for duplicate publish - IOException exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + tableSchemaTest(); + columnOrderChangeTest(); + hiveReadTest(); + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType()); - //Test for publish with invalid partition key name - exc = null; - partitionMap.clear(); - partitionMap.put("px1", "p1value2"); - partitionMap.put("px0", "p0value2"); + //test that new columns gets added to table schema + private void tableSchemaTest() throws Exception { - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + HCatSchema tableSchema = getTableSchema(); - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType()); + assertEquals(4, tableSchema.getFields().size()); - //Test for publish with missing partition key values - exc = null; - partitionMap.clear(); - partitionMap.put("px", "p1value2"); + //Update partition schema to have 3 fields + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } + writeRecords = new ArrayList(); - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + objList.add(i); + objList.add("strvalue" + i); + objList.add("str2value" + i); - //Test for null partition value map - exc = null; - try { - runMRCreate(null, partitionColumns, writeRecords, 20, false); - } catch (IOException e) { - exc = e; - } + writeRecords.add(new DefaultHCatRecord(objList)); + } - assertTrue(exc == null); -// assertTrue(exc instanceof HCatException); -// assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType()); - // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values - - //Read should get 10 + 20 rows - runMRRead(30); - - //Read with partition filter - runMRRead(10, "part1 = \"p1value1\""); - runMRRead(20, "part1 = \"p1value2\""); - runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\""); - runMRRead(10, "part0 = \"p0value1\""); - runMRRead(20, "part0 = \"p0value2\""); - runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\""); - - tableSchemaTest(); - columnOrderChangeTest(); - hiveReadTest(); + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value5"); + partitionMap.put("part0", "p0value5"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + + tableSchema = getTableSchema(); + + //assert that c3 has got added to table schema + assertEquals(5, tableSchema.getFields().size()); + assertEquals("c1", tableSchema.getFields().get(0).getName()); + assertEquals("c2", tableSchema.getFields().get(1).getName()); + assertEquals("c3", tableSchema.getFields().get(2).getName()); + assertEquals("part1", tableSchema.getFields().get(3).getName()); + assertEquals("part0", tableSchema.getFields().get(4).getName()); + + //Test that changing column data type fails + partitionMap.clear(); + partitionMap.put("part1", "p1value6"); + partitionMap.put("part0", "p0value6"); + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); + + IOException exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; } + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); + + //Test that partition key is not allowed in data + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); - //test that new columns gets added to table schema - private void tableSchemaTest() throws Exception { - - HCatSchema tableSchema = getTableSchema(); - - assertEquals(4, tableSchema.getFields().size()); - - //Update partition schema to have 3 fields - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - - writeRecords = new ArrayList(); - - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("strvalue" + i); - objList.add("str2value" + i); - - writeRecords.add(new DefaultHCatRecord(objList)); - } - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value5"); - partitionMap.put("part0", "p0value5"); - - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - - tableSchema = getTableSchema(); - - //assert that c3 has got added to table schema - assertEquals(5, tableSchema.getFields().size()); - assertEquals("c1", tableSchema.getFields().get(0).getName()); - assertEquals("c2", tableSchema.getFields().get(1).getName()); - assertEquals("c3", tableSchema.getFields().get(2).getName()); - assertEquals("part1", tableSchema.getFields().get(3).getName()); - assertEquals("part0", tableSchema.getFields().get(4).getName()); - - //Test that changing column data type fails - partitionMap.clear(); - partitionMap.put("part1", "p1value6"); - partitionMap.put("part0", "p0value6"); - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); - - IOException exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); - - //Test that partition key is not allowed in data - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); - - List recordsContainingPartitionCols = new ArrayList(20); - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("c2value" + i); - objList.add("c3value" + i); - objList.add("p1value6"); - - recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); - } - - exc = null; - try { - runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); - } catch (IOException e) { - exc = e; - } - - List records = runMRRead(20, "part1 = \"p1value6\""); - assertEquals(20, records.size()); - records = runMRRead(20, "part0 = \"p0value6\""); - assertEquals(20, records.size()); - Integer i = 0; - for (HCatRecord rec : records) { - assertEquals(5, rec.size()); - assertTrue(rec.get(0).equals(i)); - assertTrue(rec.get(1).equals("c2value" + i)); - assertTrue(rec.get(2).equals("c3value" + i)); - assertTrue(rec.get(3).equals("p1value6")); - assertTrue(rec.get(4).equals("p0value6")); - i++; - } + List recordsContainingPartitionCols = new ArrayList(20); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("c2value" + i); + objList.add("c3value" + i); + objList.add("p1value6"); + + recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); } - //check behavior while change the order of columns - private void columnOrderChangeTest() throws Exception { + exc = null; + try { + runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); + } catch (IOException e) { + exc = e; + } - HCatSchema tableSchema = getTableSchema(); + List records = runMRRead(20, "part1 = \"p1value6\""); + assertEquals(20, records.size()); + records = runMRRead(20, "part0 = \"p0value6\""); + assertEquals(20, records.size()); + Integer i = 0; + for (HCatRecord rec : records) { + assertEquals(5, rec.size()); + assertTrue(rec.get(0).equals(i)); + assertTrue(rec.get(1).equals("c2value" + i)); + assertTrue(rec.get(2).equals("c3value" + i)); + assertTrue(rec.get(3).equals("p1value6")); + assertTrue(rec.get(4).equals("p0value6")); + i++; + } + } - assertEquals(5, tableSchema.getFields().size()); + //check behavior while change the order of columns + private void columnOrderChangeTest() throws Exception { - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + HCatSchema tableSchema = getTableSchema(); + assertEquals(5, tableSchema.getFields().size()); - writeRecords = new ArrayList(); + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - for (int i = 0; i < 10; i++) { - List objList = new ArrayList(); - objList.add(i); - objList.add("co strvalue" + i); - objList.add("co str2value" + i); + writeRecords = new ArrayList(); - writeRecords.add(new DefaultHCatRecord(objList)); - } + for (int i = 0; i < 10; i++) { + List objList = new ArrayList(); - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value8"); - partitionMap.put("part0", "p0value8"); + objList.add(i); + objList.add("co strvalue" + i); + objList.add("co str2value" + i); - Exception exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - } catch (IOException e) { - exc = e; - } + writeRecords.add(new DefaultHCatRecord(objList)); + } - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value8"); + partitionMap.put("part0", "p0value8"); + Exception exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + } catch (IOException e) { + exc = e; + } - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); - writeRecords = new ArrayList(); - for (int i = 0; i < 10; i++) { - List objList = new ArrayList(); + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - objList.add(i); - objList.add("co strvalue" + i); + writeRecords = new ArrayList(); - writeRecords.add(new DefaultHCatRecord(objList)); - } + for (int i = 0; i < 10; i++) { + List objList = new ArrayList(); - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + objList.add(i); + objList.add("co strvalue" + i); - //Read should get 10 + 20 + 10 + 10 + 20 rows - runMRRead(70); + writeRecords.add(new DefaultHCatRecord(objList)); } - //Test that data inserted through hcatoutputformat is readable from hive - private void hiveReadTest() throws Exception { + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); + //Read should get 10 + 20 + 10 + 10 + 20 rows + runMRRead(70); + } - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } + //Test that data inserted through hcatoutputformat is readable from hive + private void hiveReadTest() throws Exception { - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(70, res.size()); + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); } + + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(70, res.size()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestInputJobInfo.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestInputJobInfo.java index b80bc36..84b8b50 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestInputJobInfo.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestInputJobInfo.java @@ -26,23 +26,23 @@ public class TestInputJobInfo extends HCatBaseTest { - @Test - public void test4ArgCreate() throws Exception { - Properties p = new Properties(); - p.setProperty("key", "value"); - InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter", p); - Assert.assertEquals("Db", jobInfo.getDatabaseName()); - Assert.assertEquals("Table", jobInfo.getTableName()); - Assert.assertEquals("Filter", jobInfo.getFilter()); - Assert.assertEquals("value", jobInfo.getProperties().getProperty("key")); - } + @Test + public void test4ArgCreate() throws Exception { + Properties p = new Properties(); + p.setProperty("key", "value"); + InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter", p); + Assert.assertEquals("Db", jobInfo.getDatabaseName()); + Assert.assertEquals("Table", jobInfo.getTableName()); + Assert.assertEquals("Filter", jobInfo.getFilter()); + Assert.assertEquals("value", jobInfo.getProperties().getProperty("key")); + } - @Test - public void test3ArgCreate() throws Exception { - InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter"); - Assert.assertEquals("Db", jobInfo.getDatabaseName()); - Assert.assertEquals("Table", jobInfo.getTableName()); - Assert.assertEquals("Filter", jobInfo.getFilter()); - Assert.assertEquals(0, jobInfo.getProperties().size()); - } + @Test + public void test3ArgCreate() throws Exception { + InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter"); + Assert.assertEquals("Db", jobInfo.getDatabaseName()); + Assert.assertEquals("Table", jobInfo.getTableName()); + Assert.assertEquals("Filter", jobInfo.getFilter()); + Assert.assertEquals(0, jobInfo.getProperties().size()); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestMultiOutputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestMultiOutputFormat.java index 0667efa..aaab95d 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestMultiOutputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestMultiOutputFormat.java @@ -62,273 +62,273 @@ public class TestMultiOutputFormat { - private static final Logger LOG = LoggerFactory.getLogger(TestMultiOutputFormat.class); - private static File workDir; - private static Configuration mrConf = null; - private static FileSystem fs = null; - private static MiniMRCluster mrCluster = null; - - @BeforeClass - public static void setup() throws IOException { - createWorkDir(); - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); + private static final Logger LOG = LoggerFactory.getLogger(TestMultiOutputFormat.class); + private static File workDir; + private static Configuration mrConf = null; + private static FileSystem fs = null; + private static MiniMRCluster mrCluster = null; + + @BeforeClass + public static void setup() throws IOException { + createWorkDir(); + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + } + + private static void createWorkDir() throws IOException { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_multiout_" + Math.abs(new Random().nextLong()) + "/"; + workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + } + + @AfterClass + public static void tearDown() throws IOException { + if (mrCluster != null) { + mrCluster.shutdown(); } - - private static void createWorkDir() throws IOException { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_multiout_" + Math.abs(new Random().nextLong()) + "/"; - workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); + FileUtil.fullyDelete(workDir); + } + + /** + * A test job that reads a input file and outputs each word and the index of + * the word encountered to a text file and sequence file with different key + * values. + */ + @Test + public void testMultiOutputFormatWithoutReduce() throws Throwable { + Job job = new Job(mrConf, "MultiOutNoReduce"); + job.setMapperClass(MultiOutWordIndexMapper.class); + job.setJarByClass(this.getClass()); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setNumReduceTasks(0); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); + configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, + IntWritable.class); + Path outDir = new Path(workDir.getPath(), job.getJobName()); + FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); + FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); + + String fileContent = "Hello World"; + String inputFile = createInputFile(fileContent); + FileInputFormat.setInputPaths(job, new Path(inputFile)); + + //Test for merging of configs + DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); + String dummyFile = createInputFile("dummy file"); + DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1") + .getConfiguration(), fs); + // duplicate of the value. Merging should remove duplicates + DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2") + .getConfiguration(), fs); + + configurer.configure(); + + // Verify if the configs are merged + Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); + List fileClassPathsList = Arrays.asList(fileClassPaths); + Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); + Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); + + URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); + List cacheFilesList = Arrays.asList(cacheFiles); + Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); + Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); + + Assert.assertTrue(job.waitForCompletion(true)); + + Path textOutPath = new Path(outDir, "out1/part-m-00000"); + String[] textOutput = readFully(textOutPath).split("\n"); + Path seqOutPath = new Path(outDir, "out2/part-m-00000"); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); + Text key = new Text(); + IntWritable value = new IntWritable(); + String[] words = fileContent.split(" "); + Assert.assertEquals(words.length, textOutput.length); + LOG.info("Verifying file contents"); + for (int i = 0; i < words.length; i++) { + Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); + reader.next(key, value); + Assert.assertEquals(words[i], key.toString()); + Assert.assertEquals((i + 1), value.get()); } - - @AfterClass - public static void tearDown() throws IOException { - if (mrCluster != null) { - mrCluster.shutdown(); - } - FileUtil.fullyDelete(workDir); + Assert.assertFalse(reader.next(key, value)); + } + + /** + * A word count test job that reads a input file and outputs the count of + * words to a text file and sequence file with different key values. + */ + @Test + public void testMultiOutputFormatWithReduce() throws Throwable { + Job job = new Job(mrConf, "MultiOutWithReduce"); + + job.setMapperClass(WordCountMapper.class); + job.setReducerClass(MultiOutWordCountReducer.class); + job.setJarByClass(this.getClass()); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + + configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); + configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, + IntWritable.class); + configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, + IntWritable.class); + Path outDir = new Path(workDir.getPath(), job.getJobName()); + FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); + FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); + + configurer.configure(); + + String fileContent = "Hello World Hello World World"; + String inputFile = createInputFile(fileContent); + FileInputFormat.setInputPaths(job, new Path(inputFile)); + + Assert.assertTrue(job.waitForCompletion(true)); + + Path textOutPath = new Path(outDir, "out1/part-r-00000"); + String[] textOutput = readFully(textOutPath).split("\n"); + Path seqOutPath = new Path(outDir, "out2/part-r-00000"); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); + Text key = new Text(); + IntWritable value = new IntWritable(); + String[] words = "Hello World".split(" "); + Assert.assertEquals(words.length, textOutput.length); + for (int i = 0; i < words.length; i++) { + Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); + reader.next(key, value); + Assert.assertEquals(words[i], key.toString()); + Assert.assertEquals((i + 2), value.get()); } - - /** - * A test job that reads a input file and outputs each word and the index of - * the word encountered to a text file and sequence file with different key - * values. - */ - @Test - public void testMultiOutputFormatWithoutReduce() throws Throwable { - Job job = new Job(mrConf, "MultiOutNoReduce"); - job.setMapperClass(MultiOutWordIndexMapper.class); - job.setJarByClass(this.getClass()); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setNumReduceTasks(0); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); - configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, - IntWritable.class); - Path outDir = new Path(workDir.getPath(), job.getJobName()); - FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); - FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); - - String fileContent = "Hello World"; - String inputFile = createInputFile(fileContent); - FileInputFormat.setInputPaths(job, new Path(inputFile)); - - //Test for merging of configs - DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); - String dummyFile = createInputFile("dummy file"); - DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1") - .getConfiguration(), fs); - // duplicate of the value. Merging should remove duplicates - DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2") - .getConfiguration(), fs); - - configurer.configure(); - - // Verify if the configs are merged - Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); - List fileClassPathsList = Arrays.asList(fileClassPaths); - Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); - Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); - - URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); - List cacheFilesList = Arrays.asList(cacheFiles); - Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); - Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); - - Assert.assertTrue(job.waitForCompletion(true)); - - Path textOutPath = new Path(outDir, "out1/part-m-00000"); - String[] textOutput = readFully(textOutPath).split("\n"); - Path seqOutPath = new Path(outDir, "out2/part-m-00000"); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); - Text key = new Text(); - IntWritable value = new IntWritable(); - String[] words = fileContent.split(" "); - Assert.assertEquals(words.length, textOutput.length); - LOG.info("Verifying file contents"); - for (int i = 0; i < words.length; i++) { - Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); - reader.next(key, value); - Assert.assertEquals(words[i], key.toString()); - Assert.assertEquals((i + 1), value.get()); - } - Assert.assertFalse(reader.next(key, value)); + Assert.assertFalse(reader.next(key, value)); + + } + + + /** + * Create a file for map input + * + * @return absolute path of the file. + * @throws IOException if any error encountered + */ + private String createInputFile(String content) throws IOException { + File f = File.createTempFile("input", "txt"); + FileWriter writer = new FileWriter(f); + writer.write(content); + writer.close(); + return f.getAbsolutePath(); + } + + private String readFully(Path file) throws IOException { + FSDataInputStream in = fs.open(file); + byte[] b = new byte[in.available()]; + in.readFully(b); + in.close(); + return new String(b); + } + + private static class MultiOutWordIndexMapper extends + Mapper { + + private IntWritable index = new IntWritable(1); + private Text word = new Text(); + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + MultiOutputFormat.write("out1", index, word, context); + MultiOutputFormat.write("out2", word, index, context); + index.set(index.get() + 1); + } } - - /** - * A word count test job that reads a input file and outputs the count of - * words to a text file and sequence file with different key values. - */ - @Test - public void testMultiOutputFormatWithReduce() throws Throwable { - Job job = new Job(mrConf, "MultiOutWithReduce"); - - job.setMapperClass(WordCountMapper.class); - job.setReducerClass(MultiOutWordCountReducer.class); - job.setJarByClass(this.getClass()); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(IntWritable.class); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - - configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); - configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, - IntWritable.class); - configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, - IntWritable.class); - Path outDir = new Path(workDir.getPath(), job.getJobName()); - FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); - FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); - - configurer.configure(); - - String fileContent = "Hello World Hello World World"; - String inputFile = createInputFile(fileContent); - FileInputFormat.setInputPaths(job, new Path(inputFile)); - - Assert.assertTrue(job.waitForCompletion(true)); - - Path textOutPath = new Path(outDir, "out1/part-r-00000"); - String[] textOutput = readFully(textOutPath).split("\n"); - Path seqOutPath = new Path(outDir, "out2/part-r-00000"); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); - Text key = new Text(); - IntWritable value = new IntWritable(); - String[] words = "Hello World".split(" "); - Assert.assertEquals(words.length, textOutput.length); - for (int i = 0; i < words.length; i++) { - Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); - reader.next(key, value); - Assert.assertEquals(words[i], key.toString()); - Assert.assertEquals((i + 2), value.get()); - } - Assert.assertFalse(reader.next(key, value)); - + } + + private static class WordCountMapper extends + Mapper { + + private final static IntWritable one = new IntWritable(1); + private Text word = new Text(); + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + context.write(word, one); + } } - - - /** - * Create a file for map input - * - * @return absolute path of the file. - * @throws IOException if any error encountered - */ - private String createInputFile(String content) throws IOException { - File f = File.createTempFile("input", "txt"); - FileWriter writer = new FileWriter(f); - writer.write(content); - writer.close(); - return f.getAbsolutePath(); + } + + private static class MultiOutWordCountReducer extends + Reducer { + + private IntWritable count = new IntWritable(); + + @Override + protected void reduce(Text word, Iterable values, Context context) + throws IOException, InterruptedException { + int sum = 0; + for (IntWritable val : values) { + sum += val.get(); + } + count.set(sum); + MultiOutputFormat.write("out1", count, word, context); + MultiOutputFormat.write("out2", word, count, context); + MultiOutputFormat.write("out3", word, count, context); } + } - private String readFully(Path file) throws IOException { - FSDataInputStream in = fs.open(file); - byte[] b = new byte[in.available()]; - in.readFully(b); - in.close(); - return new String(b); - } + private static class NullOutputFormat extends + org.apache.hadoop.mapreduce.lib.output.NullOutputFormat { - private static class MultiOutWordIndexMapper extends - Mapper { - - private IntWritable index = new IntWritable(1); - private Text word = new Text(); - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - StringTokenizer itr = new StringTokenizer(value.toString()); - while (itr.hasMoreTokens()) { - word.set(itr.nextToken()); - MultiOutputFormat.write("out1", index, word, context); - MultiOutputFormat.write("out2", word, index, context); - index.set(index.get() + 1); - } + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) { + return new OutputCommitter() { + public void abortTask(TaskAttemptContext taskContext) { } - } - private static class WordCountMapper extends - Mapper { - - private final static IntWritable one = new IntWritable(1); - private Text word = new Text(); - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - StringTokenizer itr = new StringTokenizer(value.toString()); - while (itr.hasMoreTokens()) { - word.set(itr.nextToken()); - context.write(word, one); - } + public void cleanupJob(JobContext jobContext) { } - } - private static class MultiOutWordCountReducer extends - Reducer { - - private IntWritable count = new IntWritable(); - - @Override - protected void reduce(Text word, Iterable values, Context context) - throws IOException, InterruptedException { - int sum = 0; - for (IntWritable val : values) { - sum += val.get(); - } - count.set(sum); - MultiOutputFormat.write("out1", count, word, context); - MultiOutputFormat.write("out2", word, count, context); - MultiOutputFormat.write("out3", word, count, context); + public void commitJob(JobContext jobContext) { } - } - - private static class NullOutputFormat extends - org.apache.hadoop.mapreduce.lib.output.NullOutputFormat { - - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) { - return new OutputCommitter() { - public void abortTask(TaskAttemptContext taskContext) { - } - public void cleanupJob(JobContext jobContext) { - } - - public void commitJob(JobContext jobContext) { - } - - public void commitTask(TaskAttemptContext taskContext) { - Assert.fail("needsTaskCommit is false but commitTask was called"); - } + public void commitTask(TaskAttemptContext taskContext) { + Assert.fail("needsTaskCommit is false but commitTask was called"); + } - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } + public boolean needsTaskCommit(TaskAttemptContext taskContext) { + return false; + } - public void setupJob(JobContext jobContext) { - } + public void setupJob(JobContext jobContext) { + } - public void setupTask(TaskAttemptContext taskContext) { - } - }; + public void setupTask(TaskAttemptContext taskContext) { } + }; } + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java index 8d286ab..5e2b699 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java @@ -47,97 +47,97 @@ import org.junit.Test; public class TestPassProperties { - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - private static Driver driver; - private static PigServer server; - private static String[] input; - private static HiveConf hiveConf; - - public void Initialize() throws Exception { - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - int numRows = 3; - input = new String[numRows]; - for (int i = 0; i < numRows; i++) { - String col1 = "a" + i; - String col2 = "b" + i; - input[i] = i + "," + col1 + "," + col2; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - server = new PigServer(ExecType.LOCAL); + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + private static Driver driver; + private static PigServer server; + private static String[] input; + private static HiveConf hiveConf; + + public void Initialize() throws Exception { + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + int numRows = 3; + input = new String[numRows]; + for (int i = 0; i < numRows; i++) { + String col1 = "a" + i; + String col2 = "b" + i; + input[i] = i + "," + col1 + "," + col2; } - - @Test - public void testSequenceTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE bad_props_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table bad_props_table"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); - - boolean caughtException = false; - try { - Configuration conf = new Configuration(); - conf.set("hive.metastore.uris", "thrift://no.such.machine:10888"); - conf.set("hive.metastore.local", "false"); - Job job = new Job(conf, "Write-hcat-seq-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); - - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "bad_props_table", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - new FileOutputCommitterContainer(job, null).cleanupJob(job); - } catch (Exception e) { - caughtException = true; - assertTrue(e.getMessage().contains( - "Could not connect to meta store using any of the URIs provided")); - } - assertTrue(caughtException); + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + server = new PigServer(ExecType.LOCAL); + } + + @Test + public void testSequenceTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE bad_props_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table bad_props_table"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); + + boolean caughtException = false; + try { + Configuration conf = new Configuration(); + conf.set("hive.metastore.uris", "thrift://no.such.machine:10888"); + conf.set("hive.metastore.local", "false"); + Job job = new Job(conf, "Write-hcat-seq-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); + + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "bad_props_table", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + new FileOutputCommitterContainer(job, null).cleanupJob(job); + } catch (Exception e) { + caughtException = true; + assertTrue(e.getMessage().contains( + "Could not connect to meta store using any of the URIs provided")); } - - public static class Map extends Mapper { - - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - DefaultHCatRecord record = new DefaultHCatRecord(3); - record.set(0, Integer.parseInt(cols[0])); - record.set(1, cols[1]); - record.set(2, cols[2]); - context.write(NullWritable.get(), record); - } - } - - private HCatSchema getSchema() throws HCatException { - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, - "")); - schema.append(new HCatFieldSchema("a1", - HCatFieldSchema.Type.STRING, "")); - schema.append(new HCatFieldSchema("a2", - HCatFieldSchema.Type.STRING, "")); - return schema; + assertTrue(caughtException); + } + + public static class Map extends Mapper { + + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + DefaultHCatRecord record = new DefaultHCatRecord(3); + record.set(0, Integer.parseInt(cols[0])); + record.set(1, cols[1]); + record.set(2, cols[2]); + context.write(NullWritable.get(), record); } + } + + private HCatSchema getSchema() throws HCatException { + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, + "")); + schema.append(new HCatFieldSchema("a1", + HCatFieldSchema.Type.STRING, "")); + schema.append(new HCatFieldSchema("a2", + HCatFieldSchema.Type.STRING, "")); + return schema; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java index 4c59f7e..0bb3c41 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java @@ -54,212 +54,212 @@ import org.junit.Test; public class TestSequenceFileReadWrite extends TestCase { - private static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + private static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - private static Driver driver; - private static PigServer server; - private static String[] input; - private static HiveConf hiveConf; + private static Driver driver; + private static PigServer server; + private static String[] input; + private static HiveConf hiveConf; - public void Initialize() throws Exception { - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); + public void Initialize() throws Exception { + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); - new File(TEST_WAREHOUSE_DIR).mkdirs(); + new File(TEST_WAREHOUSE_DIR).mkdirs(); - int numRows = 3; - input = new String[numRows]; - for (int i = 0; i < numRows; i++) { - String col1 = "a" + i; - String col2 = "b" + i; - input[i] = i + "," + col1 + "," + col2; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - server = new PigServer(ExecType.LOCAL); + int numRows = 3; + input = new String[numRows]; + for (int i = 0; i < numRows; i++) { + String col1 = "a" + i; + String col2 = "b" + i; + input[i] = i + "," + col1 + "," + col2; } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + server = new PigServer(ExecType.LOCAL); + } - @Test - public void testSequenceTableWriteRead() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table demo_table"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testSequenceTableWriteRead() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table demo_table"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - server.setBatchOn(); - server.registerQuery("A = load '" - + INPUT_FILE_NAME - + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); - server.registerQuery("store A into 'demo_table' using org.apache.hive.hcatalog.pig.HCatStorer();"); - server.executeBatch(); + server.setBatchOn(); + server.registerQuery("A = load '" + + INPUT_FILE_NAME + + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); + server.registerQuery("store A into 'demo_table' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.executeBatch(); - server.registerQuery("B = load 'demo_table' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("B"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.registerQuery("B = load 'demo_table' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("B"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - @Test - public void testTextTableWriteRead() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_1(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; - driver.run("drop table demo_table_1"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testTextTableWriteRead() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_1(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; + driver.run("drop table demo_table_1"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - server.setBatchOn(); - server.registerQuery("A = load '" - + INPUT_FILE_NAME - + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); - server.registerQuery("store A into 'demo_table_1' using org.apache.hive.hcatalog.pig.HCatStorer();"); - server.executeBatch(); + server.setBatchOn(); + server.registerQuery("A = load '" + + INPUT_FILE_NAME + + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); + server.registerQuery("store A into 'demo_table_1' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.executeBatch(); - server.registerQuery("B = load 'demo_table_1' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("B"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.registerQuery("B = load 'demo_table_1' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("B"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - @Test - public void testSequenceTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table demo_table_2"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testSequenceTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table demo_table_2"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - Configuration conf = new Configuration(); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - Job job = new Job(conf, "Write-hcat-seq-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); + Configuration conf = new Configuration(); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + Job job = new Job(conf, "Write-hcat-seq-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_2", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - if (!HCatUtil.isHadoop23()) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } - assertTrue(job.isSuccessful()); + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_2", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + if (!HCatUtil.isHadoop23()) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } + assertTrue(job.isSuccessful()); - server.setBatchOn(); - server.registerQuery("C = load 'default.demo_table_2' using org.apache.hive.hcatalog.pig.HCatLoader();"); - server.executeBatch(); - Iterator XIter = server.openIterator("C"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.setBatchOn(); + server.registerQuery("C = load 'default.demo_table_2' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.executeBatch(); + Iterator XIter = server.openIterator("C"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - @Test - public void testTextTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; - driver.run("drop table demo_table_3"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); + @Test + public void testTextTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; + driver.run("drop table demo_table_3"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); - Configuration conf = new Configuration(); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - Job job = new Job(conf, "Write-hcat-text-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); + Configuration conf = new Configuration(); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + Job job = new Job(conf, "Write-hcat-text-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - job.setNumReduceTasks(0); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + job.setNumReduceTasks(0); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_3", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - assertTrue(job.waitForCompletion(true)); - if (!HCatUtil.isHadoop23()) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } - assertTrue(job.isSuccessful()); + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_3", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + assertTrue(job.waitForCompletion(true)); + if (!HCatUtil.isHadoop23()) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } + assertTrue(job.isSuccessful()); - server.setBatchOn(); - server.registerQuery("D = load 'default.demo_table_3' using org.apache.hive.hcatalog.pig.HCatLoader();"); - server.executeBatch(); - Iterator XIter = server.openIterator("D"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); + server.setBatchOn(); + server.registerQuery("D = load 'default.demo_table_3' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.executeBatch(); + Iterator XIter = server.openIterator("D"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; } + assertEquals(input.length, numTuplesRead); + } - public static class Map extends Mapper { + public static class Map extends Mapper { - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - DefaultHCatRecord record = new DefaultHCatRecord(3); - record.set(0, Integer.parseInt(cols[0])); - record.set(1, cols[1]); - record.set(2, cols[2]); - context.write(NullWritable.get(), record); - } + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + DefaultHCatRecord record = new DefaultHCatRecord(3); + record.set(0, Integer.parseInt(cols[0])); + record.set(1, cols[1]); + record.set(2, cols[2]); + context.write(NullWritable.get(), record); } + } - private HCatSchema getSchema() throws HCatException { - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, - "")); - schema.append(new HCatFieldSchema("a1", - HCatFieldSchema.Type.STRING, "")); - schema.append(new HCatFieldSchema("a2", - HCatFieldSchema.Type.STRING, "")); - return schema; - } + private HCatSchema getSchema() throws HCatException { + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, + "")); + schema.append(new HCatFieldSchema("a1", + HCatFieldSchema.Type.STRING, "")); + schema.append(new HCatFieldSchema("a2", + HCatFieldSchema.Type.STRING, "")); + return schema; + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java index edc2e4e..d677466 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java @@ -53,196 +53,196 @@ */ public class TestRCFileMapReduceInputFormat extends TestCase { - private static final Logger LOG = LoggerFactory.getLogger(TestRCFileMapReduceInputFormat.class); + private static final Logger LOG = LoggerFactory.getLogger(TestRCFileMapReduceInputFormat.class); - private static Configuration conf = new Configuration(); + private static Configuration conf = new Configuration(); - private static ColumnarSerDe serDe; + private static ColumnarSerDe serDe; - private static Path file; + private static Path file; - private static FileSystem fs; + private static FileSystem fs; - private static Properties tbl; + private static Properties tbl; - static { - try { - fs = FileSystem.getLocal(conf); - Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); - file = new Path(dir, "test_rcfile"); - fs.delete(dir, true); - // the SerDe part is from TestLazySimpleSerDe - serDe = new ColumnarSerDe(); - // Create the SerDe - tbl = createProperties(); - serDe.initialize(conf, tbl); - } catch (Exception e) { - } + static { + try { + fs = FileSystem.getLocal(conf); + Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); + file = new Path(dir, "test_rcfile"); + fs.delete(dir, true); + // the SerDe part is from TestLazySimpleSerDe + serDe = new ColumnarSerDe(); + // Create the SerDe + tbl = createProperties(); + serDe.initialize(conf, tbl); + } catch (Exception e) { } - - private static BytesRefArrayWritable patialS = new BytesRefArrayWritable(); - - private static byte[][] bytesArray = null; - - private static BytesRefArrayWritable s = null; - - static { - try { - bytesArray = new byte[][]{"123".getBytes("UTF-8"), - "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), - "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), - "hive and hadoop".getBytes("UTF-8"), new byte[0], - "NULL".getBytes("UTF-8")}; - s = new BytesRefArrayWritable(bytesArray.length); - s.set(0, new BytesRefWritable("123".getBytes("UTF-8"))); - s.set(1, new BytesRefWritable("456".getBytes("UTF-8"))); - s.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); - s.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); - s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8"))); - s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8"))); - s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); - s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); - - // partial test init - patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); - patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); - patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(5, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); - - } catch (UnsupportedEncodingException e) { - } - } - - - /** For debugging and testing. */ - public static void main(String[] args) throws Exception { - int count = 10000; - boolean create = true; - - String usage = "Usage: RCFile " + "[-count N]" + " file"; - if (args.length == 0) { - LOG.error(usage); - System.exit(-1); - } - - try { - for (int i = 0; i < args.length; ++i) { // parse command line - if (args[i] == null) { - continue; - } else if (args[i].equals("-count")) { - count = Integer.parseInt(args[++i]); - } else { - // file is required parameter - file = new Path(args[i]); - } - } - - if (file == null) { - LOG.error(usage); - System.exit(-1); - } - - LOG.info("count = {}", count); - LOG.info("create = {}", create); - LOG.info("file = {}", file); - - // test.performanceTest(); - LOG.info("Finished."); - } finally { - fs.close(); - } + } + + private static BytesRefArrayWritable patialS = new BytesRefArrayWritable(); + + private static byte[][] bytesArray = null; + + private static BytesRefArrayWritable s = null; + + static { + try { + bytesArray = new byte[][]{"123".getBytes("UTF-8"), + "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), + "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), + "hive and hadoop".getBytes("UTF-8"), new byte[0], + "NULL".getBytes("UTF-8")}; + s = new BytesRefArrayWritable(bytesArray.length); + s.set(0, new BytesRefWritable("123".getBytes("UTF-8"))); + s.set(1, new BytesRefWritable("456".getBytes("UTF-8"))); + s.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); + s.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); + s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8"))); + s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8"))); + s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); + s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); + + // partial test init + patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); + patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); + patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(5, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); + + } catch (UnsupportedEncodingException e) { } + } - private static Properties createProperties() { - Properties tbl = new Properties(); - - // Set the configuration parameters - tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - tbl.setProperty("columns", - "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); - tbl.setProperty("columns.types", - "tinyint:smallint:int:bigint:double:string:int:string"); - tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); - return tbl; - } + /** For debugging and testing. */ + public static void main(String[] args) throws Exception { + int count = 10000; + boolean create = true; - public void testSynAndSplit() throws IOException, InterruptedException { - splitBeforeSync(); - splitRightBeforeSync(); - splitInMiddleOfSync(); - splitRightAfterSync(); - splitAfterSync(); + String usage = "Usage: RCFile " + "[-count N]" + " file"; + if (args.length == 0) { + LOG.error(usage); + System.exit(-1); } - private void splitBeforeSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(600, 1000, 2, 17684, null); - } + try { + for (int i = 0; i < args.length; ++i) { // parse command line + if (args[i] == null) { + continue; + } else if (args[i].equals("-count")) { + count = Integer.parseInt(args[++i]); + } else { + // file is required parameter + file = new Path(args[i]); + } + } - private void splitRightBeforeSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17750, null); - } + if (file == null) { + LOG.error(usage); + System.exit(-1); + } - private void splitInMiddleOfSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17760, null); + LOG.info("count = {}", count); + LOG.info("create = {}", create); + LOG.info("file = {}", file); + // test.performanceTest(); + LOG.info("Finished."); + } finally { + fs.close(); } - - private void splitRightAfterSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17770, null); + } + + private static Properties createProperties() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", + "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); + tbl.setProperty("columns.types", + "tinyint:smallint:int:bigint:double:string:int:string"); + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + return tbl; + } + + + public void testSynAndSplit() throws IOException, InterruptedException { + splitBeforeSync(); + splitRightBeforeSync(); + splitInMiddleOfSync(); + splitRightAfterSync(); + splitAfterSync(); + } + + private void splitBeforeSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(600, 1000, 2, 17684, null); + } + + private void splitRightBeforeSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17750, null); + } + + private void splitInMiddleOfSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17760, null); + + } + + private void splitRightAfterSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17770, null); + } + + private void splitAfterSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 19950, null); + } + + private void writeThenReadByRecordReader(int intervalRecordCount, + int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) + throws IOException, InterruptedException { + Path testDir = new Path(System.getProperty("test.data.dir", ".") + + "/mapred/testsmallfirstsplit"); + Path testFile = new Path(testDir, "test_rcfile"); + fs.delete(testFile, true); + Configuration cloneConf = new Configuration(conf); + RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); + cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount); + + RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); + + BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); + for (int i = 0; i < bytesArray.length; i++) { + BytesRefWritable cu = null; + cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); + bytes.set(i, cu); } - - private void splitAfterSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 19950, null); + for (int i = 0; i < writeCount; i++) { + writer.append(bytes); } - - private void writeThenReadByRecordReader(int intervalRecordCount, - int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) - throws IOException, InterruptedException { - Path testDir = new Path(System.getProperty("test.data.dir", ".") - + "/mapred/testsmallfirstsplit"); - Path testFile = new Path(testDir, "test_rcfile"); - fs.delete(testFile, true); - Configuration cloneConf = new Configuration(conf); - RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); - cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount); - - RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); - - BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); - for (int i = 0; i < bytesArray.length; i++) { - BytesRefWritable cu = null; - cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); - bytes.set(i, cu); - } - for (int i = 0; i < writeCount; i++) { - writer.append(bytes); - } - writer.close(); - - RCFileMapReduceInputFormat inputFormat = new RCFileMapReduceInputFormat(); - Configuration jonconf = new Configuration(cloneConf); - jonconf.set("mapred.input.dir", testDir.toString()); - JobContext context = new Job(jonconf); - context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize); - List splits = inputFormat.getSplits(context); - assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber); - int readCount = 0; - for (int i = 0; i < splits.size(); i++) { - TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, - new TaskAttemptID()); - RecordReader rr = inputFormat.createRecordReader(splits.get(i), tac); - rr.initialize(splits.get(i), tac); - while (rr.nextKeyValue()) { - readCount++; - } - } - assertEquals("readCount should be equal to writeCount", readCount, writeCount); + writer.close(); + + RCFileMapReduceInputFormat inputFormat = new RCFileMapReduceInputFormat(); + Configuration jonconf = new Configuration(cloneConf); + jonconf.set("mapred.input.dir", testDir.toString()); + JobContext context = new Job(jonconf); + context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize); + List splits = inputFormat.getSplits(context); + assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber); + int readCount = 0; + for (int i = 0; i < splits.size(); i++) { + TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, + new TaskAttemptID()); + RecordReader rr = inputFormat.createRecordReader(splits.get(i), tac); + rr.initialize(splits.get(i), tac); + while (rr.nextKeyValue()) { + readCount++; + } } + assertEquals("readCount should be equal to writeCount", readCount, writeCount); + } } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/security/TestHdfsAuthorizationProvider.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/security/TestHdfsAuthorizationProvider.java index 41dbddd..bf6f19a 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/security/TestHdfsAuthorizationProvider.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/security/TestHdfsAuthorizationProvider.java @@ -54,530 +54,530 @@ public class TestHdfsAuthorizationProvider { - protected HCatDriver hcatDriver; - protected HiveMetaStoreClient msc; - protected HiveConf conf; - protected String whDir; - protected Path whPath; - protected FileSystem whFs; - protected Warehouse wh; - protected Hive hive; - - @Before - public void setUp() throws Exception { - - conf = new HiveConf(this.getClass()); - conf.set(ConfVars.PREEXECHOOKS.varname, ""); - conf.set(ConfVars.POSTEXECHOOKS.varname, ""); - conf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - conf.set("hive.metastore.local", "true"); - conf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - conf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, true); - conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - StorageDelegationAuthorizationProvider.class.getCanonicalName()); - conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); - - whDir = System.getProperty("test.warehouse.dir", "/tmp/testhdfsauthorization_wh"); - conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, whDir); - - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String username = ShimLoader.getHadoopShims().getShortUserName(ugi); - - whPath = new Path(whDir); - whFs = whPath.getFileSystem(conf); - - wh = new Warehouse(conf); - hive = Hive.get(conf); - - //clean up mess in HMS - HcatTestUtils.cleanupHMS(hive, wh, perm700); - - whFs.delete(whPath, true); - whFs.mkdirs(whPath, perm755); - - SessionState.start(new CliSessionState(conf)); - hcatDriver = new HCatDriver(); - } - - @After - public void tearDown() throws IOException { - whFs.close(); - hcatDriver.close(); - Hive.closeCurrent(); - } - - public Path getDbPath(String dbName) throws MetaException, HiveException { - return HcatTestUtils.getDbPath(hive, wh, dbName); - } - - public Path getTablePath(String dbName, String tableName) throws HiveException { - Table table = hive.getTable(dbName, tableName); - return table.getPath(); - } - - public Path getPartPath(String partName, String dbName, String tableName) throws HiveException { - return new Path(getTablePath(dbName, tableName), partName); - } - - /** Execute the query expecting success*/ - public void exec(String format, Object... args) throws Exception { - String command = String.format(format, args); - CommandProcessorResponse resp = hcatDriver.run(command); - Assert.assertEquals(resp.getErrorMessage(), 0, resp.getResponseCode()); - Assert.assertEquals(resp.getErrorMessage(), null, resp.getErrorMessage()); - } - - /** Execute the query expecting it to fail with AuthorizationException */ - public void execFail(String format, Object... args) throws Exception { - String command = String.format(format, args); - CommandProcessorResponse resp = hcatDriver.run(command); - Assert.assertNotSame(resp.getErrorMessage(), 0, resp.getResponseCode()); - Assert.assertTrue((resp.getResponseCode() == 40000) || (resp.getResponseCode() == 403)); - if (resp.getErrorMessage() != null) { - Assert.assertTrue(resp.getErrorMessage().contains("org.apache.hadoop.security.AccessControlException")); - } - } - - - /** - * Tests whether the warehouse directory is writable by the current user (as defined by Hadoop) - */ - @Test - public void testWarehouseIsWritable() throws Exception { - Path top = new Path(whPath, "_foobarbaz12_"); - try { - whFs.mkdirs(top); - } finally { - whFs.delete(top, true); - } - } - - @Test - public void testShowDatabases() throws Exception { - exec("CREATE DATABASE doo"); - exec("SHOW DATABASES"); - - whFs.setPermission(whPath, perm300); //revoke r - execFail("SHOW DATABASES"); - } - - @Test - public void testDatabaseOps() throws Exception { - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - exec("DESCRIBE DATABASE doo"); - exec("USE doo"); - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - exec("DROP DATABASE doo"); - - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - exec("DESCRIBE DATABASE doo2", dbPath.toUri()); - exec("USE doo2"); - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - exec("DROP DATABASE doo2", dbPath.toUri()); - - //custom non-existing location - exec("CREATE DATABASE doo3 LOCATION '%s/subpath'", dbPath.toUri()); - } - - @Test - public void testCreateDatabaseFail1() throws Exception { - whFs.setPermission(whPath, perm500); - execFail("CREATE DATABASE doo"); //in the default location - - whFs.setPermission(whPath, perm555); - execFail("CREATE DATABASE doo2"); - } - - @Test - public void testCreateDatabaseFail2() throws Exception { - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - - whFs.mkdirs(dbPath, perm700); - whFs.setPermission(dbPath, perm500); - execFail("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - } - - @Test - public void testDropDatabaseFail1() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); //in the default location - - whFs.setPermission(getDbPath("doo"), perm500); //revoke write - execFail("DROP DATABASE doo"); - } - - @Test - public void testDropDatabaseFail2() throws Exception { - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - - whFs.setPermission(dbPath, perm500); - execFail("DROP DATABASE doo2"); - } - - @Test - public void testDescSwitchDatabaseFail() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - whFs.setPermission(getDbPath("doo"), perm300); //revoke read - execFail("DESCRIBE DATABASE doo"); - execFail("USE doo"); - - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - whFs.mkdirs(dbPath, perm300); //revoke read - execFail("DESCRIBE DATABASE doo2", dbPath.toUri()); - execFail("USE doo2"); - } - - @Test - public void testShowTablesFail() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - exec("USE doo"); - whFs.setPermission(getDbPath("doo"), perm300); //revoke read - execFail("SHOW TABLES"); - execFail("SHOW TABLE EXTENDED LIKE foo1"); - } - - @Test - public void testTableOps() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE foo1"); - exec("DROP TABLE foo1"); - - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("DESCRIBE foo2"); - exec("DROP TABLE foo2"); - - //default db custom non existing location - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - exec("DESCRIBE foo3"); - exec("DROP TABLE foo3"); - - //non default db - exec("CREATE DATABASE doo"); - exec("USE doo"); - - exec("CREATE TABLE foo4 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE foo4"); - exec("DROP TABLE foo4"); - - //non-default db custom location - tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("DESCRIBE foo5"); - exec("DROP TABLE foo5"); - - //non-default db custom non existing location - exec("CREATE EXTERNAL TABLE foo6 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - exec("DESCRIBE foo6"); - exec("DROP TABLE foo6"); - - exec("DROP TABLE IF EXISTS foo_non_exists"); - - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE EXTENDED foo1"); - exec("DESCRIBE FORMATTED foo1"); - exec("DESCRIBE foo1.foo"); - - //deep non-existing path for the table - tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); - } - - @Test - public void testCreateTableFail1() throws Exception { - //default db - whFs.mkdirs(whPath, perm500); //revoke w - execFail("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - } - - @Test - public void testCreateTableFail2() throws Exception { - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); - execFail("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - - //default db custom non existing location - execFail("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - } - - @Test - public void testCreateTableFail3() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - whFs.setPermission(getDbPath("doo"), perm500); - - execFail("CREATE TABLE doo.foo4 (foo INT) STORED AS RCFILE"); - - //non-default db custom location, permission to write to tablePath, but not on db path - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("USE doo"); - execFail("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - } - - @Test - public void testCreateTableFail4() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - - //non-default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); - execFail("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - - //non-default db custom non existing location - execFail("CREATE EXTERNAL TABLE doo.foo6 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); - } - - @Test - public void testDropTableFail1() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke w - execFail("DROP TABLE foo1"); - } - - @Test - public void testDropTableFail2() throws Exception { - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); - execFail("DROP TABLE foo2"); - } - - @Test - public void testDropTableFail4() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - - //non-default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - - exec("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); - exec("USE doo"); //There is no DROP TABLE doo.foo5 support in Hive - execFail("DROP TABLE foo5"); - } - - @Test - public void testDescTableFail() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read - execFail("DESCRIBE foo1"); - - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm300); //revoke read - execFail("DESCRIBE foo2"); - } - - @Test - public void testAlterTableRename() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("ALTER TABLE foo1 RENAME TO foo2"); - - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("ALTER TABLE foo3 RENAME TO foo4"); - } - - @Test - public void testAlterTableRenameFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write - execFail("ALTER TABLE foo1 RENAME TO foo2"); - - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo3 RENAME TO foo4"); - } - - @Test - public void testAlterTableRelocate() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - } - - @Test - public void testAlterTableRelocateFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - //dont have access to new table loc - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - //have access to new table loc, but not old table loc - tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); - exec("CREATE EXTERNAL TABLE foo4 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - whFs.mkdirs(tablePath, perm500); //revoke write - tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); - execFail("ALTER TABLE foo4 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - } - - @Test - public void testAlterTable() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); - exec("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); - exec("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); - } - - @Test - public void testAddDropPartition() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - exec("ALTER TABLE foo1 ADD IF NOT EXISTS PARTITION (b='2010-10-10')"); - String relPath = new Random().nextInt() + "/mypart"; - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-11') LOCATION '%s'", relPath); - - exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT RCFILE"); - - exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT INPUTFORMAT " - + "'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " - + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver " - + "'mydriver' outputdriver 'yourdriver'"); - - exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-11')"); - } - - @Test - public void testAddPartitionFail1() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); - execFail("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - } - - @Test - public void testAddPartitionFail2() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - String relPath = new Random().nextInt() + "/mypart"; - Path partPath = new Path(getTablePath("default", "foo1"), relPath); - whFs.mkdirs(partPath, perm500); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); - } - - @Test - public void testDropPartitionFail1() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - whFs.mkdirs(getPartPath("b=2010-10-10", "default", "foo1"), perm500); - execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - } - - @Test - public void testDropPartitionFail2() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - String relPath = new Random().nextInt() + "/mypart"; - Path partPath = new Path(getTablePath("default", "foo1"), relPath); - whFs.mkdirs(partPath, perm700); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); - whFs.mkdirs(partPath, perm500); //revoke write - execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - } - - @Test - public void testAlterTableFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write - execFail("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); - execFail("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); - execFail("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); - } - - @Test - public void testShowTables() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); - exec("SHOW PARTITIONS foo1"); - - whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read - execFail("SHOW PARTITIONS foo1"); - } - - @Test - public void testAlterTablePartRename() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); - exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); - } - - @Test - public void testAlterTablePartRenameFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); - whFs.setPermission(loc, perm500); //revoke w - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); - } - - @Test - public void testAlterTablePartRelocate() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16')"); - Path partPath = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", partPath.makeQualified(whFs)); - } - - @Test - public void testAlterTablePartRelocateFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - - Path oldLoc = new Path(whPath, new Random().nextInt() + "/mypart"); - Path newLoc = new Path(whPath, new Random().nextInt() + "/mypart2"); - - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", oldLoc); - whFs.mkdirs(oldLoc, perm500); - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); - whFs.mkdirs(oldLoc, perm700); - whFs.mkdirs(newLoc, perm500); - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); - } + protected HCatDriver hcatDriver; + protected HiveMetaStoreClient msc; + protected HiveConf conf; + protected String whDir; + protected Path whPath; + protected FileSystem whFs; + protected Warehouse wh; + protected Hive hive; + + @Before + public void setUp() throws Exception { + + conf = new HiveConf(this.getClass()); + conf.set(ConfVars.PREEXECHOOKS.varname, ""); + conf.set(ConfVars.POSTEXECHOOKS.varname, ""); + conf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + conf.set("hive.metastore.local", "true"); + conf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + conf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, true); + conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + StorageDelegationAuthorizationProvider.class.getCanonicalName()); + conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); + + whDir = System.getProperty("test.warehouse.dir", "/tmp/testhdfsauthorization_wh"); + conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, whDir); + + UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + String username = ShimLoader.getHadoopShims().getShortUserName(ugi); + + whPath = new Path(whDir); + whFs = whPath.getFileSystem(conf); + + wh = new Warehouse(conf); + hive = Hive.get(conf); + + //clean up mess in HMS + HcatTestUtils.cleanupHMS(hive, wh, perm700); + + whFs.delete(whPath, true); + whFs.mkdirs(whPath, perm755); + + SessionState.start(new CliSessionState(conf)); + hcatDriver = new HCatDriver(); + } + + @After + public void tearDown() throws IOException { + whFs.close(); + hcatDriver.close(); + Hive.closeCurrent(); + } + + public Path getDbPath(String dbName) throws MetaException, HiveException { + return HcatTestUtils.getDbPath(hive, wh, dbName); + } + + public Path getTablePath(String dbName, String tableName) throws HiveException { + Table table = hive.getTable(dbName, tableName); + return table.getPath(); + } + + public Path getPartPath(String partName, String dbName, String tableName) throws HiveException { + return new Path(getTablePath(dbName, tableName), partName); + } + + /** Execute the query expecting success*/ + public void exec(String format, Object... args) throws Exception { + String command = String.format(format, args); + CommandProcessorResponse resp = hcatDriver.run(command); + Assert.assertEquals(resp.getErrorMessage(), 0, resp.getResponseCode()); + Assert.assertEquals(resp.getErrorMessage(), null, resp.getErrorMessage()); + } + + /** Execute the query expecting it to fail with AuthorizationException */ + public void execFail(String format, Object... args) throws Exception { + String command = String.format(format, args); + CommandProcessorResponse resp = hcatDriver.run(command); + Assert.assertNotSame(resp.getErrorMessage(), 0, resp.getResponseCode()); + Assert.assertTrue((resp.getResponseCode() == 40000) || (resp.getResponseCode() == 403)); + if (resp.getErrorMessage() != null) { + Assert.assertTrue(resp.getErrorMessage().contains("org.apache.hadoop.security.AccessControlException")); + } + } + + + /** + * Tests whether the warehouse directory is writable by the current user (as defined by Hadoop) + */ + @Test + public void testWarehouseIsWritable() throws Exception { + Path top = new Path(whPath, "_foobarbaz12_"); + try { + whFs.mkdirs(top); + } finally { + whFs.delete(top, true); + } + } + + @Test + public void testShowDatabases() throws Exception { + exec("CREATE DATABASE doo"); + exec("SHOW DATABASES"); + + whFs.setPermission(whPath, perm300); //revoke r + execFail("SHOW DATABASES"); + } + + @Test + public void testDatabaseOps() throws Exception { + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + exec("DESCRIBE DATABASE doo"); + exec("USE doo"); + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + exec("DROP DATABASE doo"); + + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + exec("DESCRIBE DATABASE doo2", dbPath.toUri()); + exec("USE doo2"); + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + exec("DROP DATABASE doo2", dbPath.toUri()); + + //custom non-existing location + exec("CREATE DATABASE doo3 LOCATION '%s/subpath'", dbPath.toUri()); + } + + @Test + public void testCreateDatabaseFail1() throws Exception { + whFs.setPermission(whPath, perm500); + execFail("CREATE DATABASE doo"); //in the default location + + whFs.setPermission(whPath, perm555); + execFail("CREATE DATABASE doo2"); + } + + @Test + public void testCreateDatabaseFail2() throws Exception { + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + + whFs.mkdirs(dbPath, perm700); + whFs.setPermission(dbPath, perm500); + execFail("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + } + + @Test + public void testDropDatabaseFail1() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); //in the default location + + whFs.setPermission(getDbPath("doo"), perm500); //revoke write + execFail("DROP DATABASE doo"); + } + + @Test + public void testDropDatabaseFail2() throws Exception { + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + + whFs.setPermission(dbPath, perm500); + execFail("DROP DATABASE doo2"); + } + + @Test + public void testDescSwitchDatabaseFail() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + whFs.setPermission(getDbPath("doo"), perm300); //revoke read + execFail("DESCRIBE DATABASE doo"); + execFail("USE doo"); + + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + whFs.mkdirs(dbPath, perm300); //revoke read + execFail("DESCRIBE DATABASE doo2", dbPath.toUri()); + execFail("USE doo2"); + } + + @Test + public void testShowTablesFail() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + exec("USE doo"); + whFs.setPermission(getDbPath("doo"), perm300); //revoke read + execFail("SHOW TABLES"); + execFail("SHOW TABLE EXTENDED LIKE foo1"); + } + + @Test + public void testTableOps() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE foo1"); + exec("DROP TABLE foo1"); + + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("DESCRIBE foo2"); + exec("DROP TABLE foo2"); + + //default db custom non existing location + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + exec("DESCRIBE foo3"); + exec("DROP TABLE foo3"); + + //non default db + exec("CREATE DATABASE doo"); + exec("USE doo"); + + exec("CREATE TABLE foo4 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE foo4"); + exec("DROP TABLE foo4"); + + //non-default db custom location + tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("DESCRIBE foo5"); + exec("DROP TABLE foo5"); + + //non-default db custom non existing location + exec("CREATE EXTERNAL TABLE foo6 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + exec("DESCRIBE foo6"); + exec("DROP TABLE foo6"); + + exec("DROP TABLE IF EXISTS foo_non_exists"); + + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE EXTENDED foo1"); + exec("DESCRIBE FORMATTED foo1"); + exec("DESCRIBE foo1.foo"); + + //deep non-existing path for the table + tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); + } + + @Test + public void testCreateTableFail1() throws Exception { + //default db + whFs.mkdirs(whPath, perm500); //revoke w + execFail("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + } + + @Test + public void testCreateTableFail2() throws Exception { + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); + execFail("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + + //default db custom non existing location + execFail("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + } + + @Test + public void testCreateTableFail3() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + whFs.setPermission(getDbPath("doo"), perm500); + + execFail("CREATE TABLE doo.foo4 (foo INT) STORED AS RCFILE"); + + //non-default db custom location, permission to write to tablePath, but not on db path + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("USE doo"); + execFail("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + } + + @Test + public void testCreateTableFail4() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + + //non-default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); + execFail("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + + //non-default db custom non existing location + execFail("CREATE EXTERNAL TABLE doo.foo6 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); + } + + @Test + public void testDropTableFail1() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke w + execFail("DROP TABLE foo1"); + } + + @Test + public void testDropTableFail2() throws Exception { + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); + execFail("DROP TABLE foo2"); + } + + @Test + public void testDropTableFail4() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + + //non-default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + + exec("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); + exec("USE doo"); //There is no DROP TABLE doo.foo5 support in Hive + execFail("DROP TABLE foo5"); + } + + @Test + public void testDescTableFail() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read + execFail("DESCRIBE foo1"); + + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm300); //revoke read + execFail("DESCRIBE foo2"); + } + + @Test + public void testAlterTableRename() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("ALTER TABLE foo1 RENAME TO foo2"); + + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("ALTER TABLE foo3 RENAME TO foo4"); + } + + @Test + public void testAlterTableRenameFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write + execFail("ALTER TABLE foo1 RENAME TO foo2"); + + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo3 RENAME TO foo4"); + } + + @Test + public void testAlterTableRelocate() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + } + + @Test + public void testAlterTableRelocateFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + //dont have access to new table loc + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + //have access to new table loc, but not old table loc + tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); + exec("CREATE EXTERNAL TABLE foo4 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + whFs.mkdirs(tablePath, perm500); //revoke write + tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); + execFail("ALTER TABLE foo4 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + } + + @Test + public void testAlterTable() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); + exec("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); + exec("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); + } + + @Test + public void testAddDropPartition() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + exec("ALTER TABLE foo1 ADD IF NOT EXISTS PARTITION (b='2010-10-10')"); + String relPath = new Random().nextInt() + "/mypart"; + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-11') LOCATION '%s'", relPath); + + exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT RCFILE"); + + exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT INPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver " + + "'mydriver' outputdriver 'yourdriver'"); + + exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-11')"); + } + + @Test + public void testAddPartitionFail1() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); + execFail("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + } + + @Test + public void testAddPartitionFail2() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + String relPath = new Random().nextInt() + "/mypart"; + Path partPath = new Path(getTablePath("default", "foo1"), relPath); + whFs.mkdirs(partPath, perm500); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); + } + + @Test + public void testDropPartitionFail1() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + whFs.mkdirs(getPartPath("b=2010-10-10", "default", "foo1"), perm500); + execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + } + + @Test + public void testDropPartitionFail2() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + String relPath = new Random().nextInt() + "/mypart"; + Path partPath = new Path(getTablePath("default", "foo1"), relPath); + whFs.mkdirs(partPath, perm700); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); + whFs.mkdirs(partPath, perm500); //revoke write + execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + } + + @Test + public void testAlterTableFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write + execFail("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); + execFail("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); + execFail("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); + } + + @Test + public void testShowTables() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); + exec("SHOW PARTITIONS foo1"); + + whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read + execFail("SHOW PARTITIONS foo1"); + } + + @Test + public void testAlterTablePartRename() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); + exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); + } + + @Test + public void testAlterTablePartRenameFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); + whFs.setPermission(loc, perm500); //revoke w + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); + } + + @Test + public void testAlterTablePartRelocate() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16')"); + Path partPath = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", partPath.makeQualified(whFs)); + } + + @Test + public void testAlterTablePartRelocateFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + + Path oldLoc = new Path(whPath, new Random().nextInt() + "/mypart"); + Path newLoc = new Path(whPath, new Random().nextInt() + "/mypart2"); + + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", oldLoc); + whFs.mkdirs(oldLoc, perm500); + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); + whFs.mkdirs(oldLoc, perm700); + whFs.mkdirs(newLoc, perm500); + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseLoader.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseLoader.java index 5d2fe4c..3cf0b70 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseLoader.java @@ -50,105 +50,105 @@ abstract class HCatBaseLoader extends LoadFunc implements LoadMetadata, LoadPushDown { - protected static final String PRUNE_PROJECTION_INFO = "prune.projection.info"; - - private RecordReader reader; - protected String signature; - - HCatSchema outputSchema = null; - - - @Override - public Tuple getNext() throws IOException { - try { - HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null); - Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema); - // TODO : we were discussing an iter interface, and also a LazyTuple - // change this when plans for that solidifies. - return t; - } catch (ExecException e) { - int errCode = 6018; - String errMsg = "Error while reading input"; - throw new ExecException(errMsg, errCode, - PigException.REMOTE_ENVIRONMENT, e); - } catch (Exception eOther) { - int errCode = 6018; - String errMsg = "Error converting read value to tuple"; - throw new ExecException(errMsg, errCode, - PigException.REMOTE_ENVIRONMENT, eOther); - } - - } - - @Override - public void prepareToRead(RecordReader reader, PigSplit arg1) throws IOException { - this.reader = reader; - } - - @Override - public ResourceStatistics getStatistics(String location, Job job) throws IOException { - // statistics not implemented currently - return null; - } - - @Override - public List getFeatures() { - return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION); - } - - @Override - public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldsInfo) throws FrontendException { - // Store the required fields information in the UDFContext so that we - // can retrieve it later. - storeInUDFContext(signature, PRUNE_PROJECTION_INFO, requiredFieldsInfo); - - // HCat will always prune columns based on what we ask of it - so the - // response is true - return new RequiredFieldResponse(true); + protected static final String PRUNE_PROJECTION_INFO = "prune.projection.info"; + + private RecordReader reader; + protected String signature; + + HCatSchema outputSchema = null; + + + @Override + public Tuple getNext() throws IOException { + try { + HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null); + Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema); + // TODO : we were discussing an iter interface, and also a LazyTuple + // change this when plans for that solidifies. + return t; + } catch (ExecException e) { + int errCode = 6018; + String errMsg = "Error while reading input"; + throw new ExecException(errMsg, errCode, + PigException.REMOTE_ENVIRONMENT, e); + } catch (Exception eOther) { + int errCode = 6018; + String errMsg = "Error converting read value to tuple"; + throw new ExecException(errMsg, errCode, + PigException.REMOTE_ENVIRONMENT, eOther); } - @Override - public void setUDFContextSignature(String signature) { - this.signature = signature; - } - - - // helper methods - protected void storeInUDFContext(String signature, String key, Object value) { - UDFContext udfContext = UDFContext.getUDFContext(); - Properties props = udfContext.getUDFProperties( - this.getClass(), new String[]{signature}); - props.put(key, value); - } - - /** - * A utility method to get the size of inputs. This is accomplished by summing the - * size of all input paths on supported FileSystems. Locations whose size cannot be - * determined are ignored. Note non-FileSystem and unpartitioned locations will not - * report their input size by default. - */ - protected static long getSizeInBytes(InputJobInfo inputJobInfo) throws IOException { - Configuration conf = new Configuration(); - long sizeInBytes = 0; - - for (PartInfo partInfo : inputJobInfo.getPartitions()) { - try { - Path p = new Path(partInfo.getLocation()); - if (p.getFileSystem(conf).isFile(p)) { - sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen(); - } else { - FileStatus[] fileStatuses = p.getFileSystem(conf).listStatus(p); - if (fileStatuses != null) { - for (FileStatus child : fileStatuses) { - sizeInBytes += child.getLen(); - } - } - } - } catch (IOException e) { - // Report size to the extent possible. + } + + @Override + public void prepareToRead(RecordReader reader, PigSplit arg1) throws IOException { + this.reader = reader; + } + + @Override + public ResourceStatistics getStatistics(String location, Job job) throws IOException { + // statistics not implemented currently + return null; + } + + @Override + public List getFeatures() { + return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION); + } + + @Override + public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldsInfo) throws FrontendException { + // Store the required fields information in the UDFContext so that we + // can retrieve it later. + storeInUDFContext(signature, PRUNE_PROJECTION_INFO, requiredFieldsInfo); + + // HCat will always prune columns based on what we ask of it - so the + // response is true + return new RequiredFieldResponse(true); + } + + @Override + public void setUDFContextSignature(String signature) { + this.signature = signature; + } + + + // helper methods + protected void storeInUDFContext(String signature, String key, Object value) { + UDFContext udfContext = UDFContext.getUDFContext(); + Properties props = udfContext.getUDFProperties( + this.getClass(), new String[]{signature}); + props.put(key, value); + } + + /** + * A utility method to get the size of inputs. This is accomplished by summing the + * size of all input paths on supported FileSystems. Locations whose size cannot be + * determined are ignored. Note non-FileSystem and unpartitioned locations will not + * report their input size by default. + */ + protected static long getSizeInBytes(InputJobInfo inputJobInfo) throws IOException { + Configuration conf = new Configuration(); + long sizeInBytes = 0; + + for (PartInfo partInfo : inputJobInfo.getPartitions()) { + try { + Path p = new Path(partInfo.getLocation()); + if (p.getFileSystem(conf).isFile(p)) { + sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen(); + } else { + FileStatus[] fileStatuses = p.getFileSystem(conf).listStatus(p); + if (fileStatuses != null) { + for (FileStatus child : fileStatuses) { + sizeInBytes += child.getLen(); } + } } - - return sizeInBytes; + } catch (IOException e) { + // Report size to the extent possible. + } } + + return sizeInBytes; + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java index f291521..d136f02 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java @@ -62,382 +62,382 @@ abstract class HCatBaseStorer extends StoreFunc implements StoreMetadata { - private static final List SUPPORTED_INTEGER_CONVERSIONS = - Lists.newArrayList(Type.TINYINT, Type.SMALLINT, Type.INT); - protected static final String COMPUTED_OUTPUT_SCHEMA = "hcat.output.schema"; - protected final List partitionKeys; - protected final Map partitions; - protected Schema pigSchema; - private RecordWriter, HCatRecord> writer; - protected HCatSchema computedSchema; - protected static final String PIG_SCHEMA = "hcat.pig.store.schema"; - protected String sign; - - public HCatBaseStorer(String partSpecs, String schema) throws Exception { - - partitionKeys = new ArrayList(); - partitions = new HashMap(); - if (partSpecs != null && !partSpecs.trim().isEmpty()) { - String[] partKVPs = partSpecs.split(","); - for (String partKVP : partKVPs) { - String[] partKV = partKVP.split("="); - if (partKV.length == 2) { - String partKey = partKV[0].trim(); - partitionKeys.add(partKey); - partitions.put(partKey, partKV[1].trim()); - } else { - throw new FrontendException("Invalid partition column specification. " + partSpecs, PigHCatUtil.PIG_EXCEPTION_CODE); - } - } + private static final List SUPPORTED_INTEGER_CONVERSIONS = + Lists.newArrayList(Type.TINYINT, Type.SMALLINT, Type.INT); + protected static final String COMPUTED_OUTPUT_SCHEMA = "hcat.output.schema"; + protected final List partitionKeys; + protected final Map partitions; + protected Schema pigSchema; + private RecordWriter, HCatRecord> writer; + protected HCatSchema computedSchema; + protected static final String PIG_SCHEMA = "hcat.pig.store.schema"; + protected String sign; + + public HCatBaseStorer(String partSpecs, String schema) throws Exception { + + partitionKeys = new ArrayList(); + partitions = new HashMap(); + if (partSpecs != null && !partSpecs.trim().isEmpty()) { + String[] partKVPs = partSpecs.split(","); + for (String partKVP : partKVPs) { + String[] partKV = partKVP.split("="); + if (partKV.length == 2) { + String partKey = partKV[0].trim(); + partitionKeys.add(partKey); + partitions.put(partKey, partKV[1].trim()); + } else { + throw new FrontendException("Invalid partition column specification. " + partSpecs, PigHCatUtil.PIG_EXCEPTION_CODE); } + } + } - if (schema != null) { - pigSchema = Utils.getSchemaFromString(schema); - } + if (schema != null) { + pigSchema = Utils.getSchemaFromString(schema); + } + } + + @Override + public void checkSchema(ResourceSchema resourceSchema) throws IOException { + + /* Schema provided by user and the schema computed by Pig + * at the time of calling store must match. + */ + Schema runtimeSchema = Schema.getPigSchema(resourceSchema); + if (pigSchema != null) { + if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { + throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + + "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + } else { + pigSchema = runtimeSchema; + } + UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); + } + + /** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing + * schema of the table in metastore. + */ + protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { + List fieldSchemas = new ArrayList(pigSchema.size()); + for (FieldSchema fSchema : pigSchema.getFields()) { + try { + HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); + + fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema)); + } catch (HCatException he) { + throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); + } } + return new HCatSchema(fieldSchemas); + } - @Override - public void checkSchema(ResourceSchema resourceSchema) throws IOException { - - /* Schema provided by user and the schema computed by Pig - * at the time of calling store must match. - */ - Schema runtimeSchema = Schema.getPigSchema(resourceSchema); - if (pigSchema != null) { - if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { - throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + - "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - } else { - pigSchema = runtimeSchema; - } - UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); + public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { + if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { + return true; + } + // Column was not found in table schema. Its a new column + List tupSchema = bagFieldSchema.schema.getFields(); + if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { + return true; } + return false; + } + + + private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema) throws FrontendException, HCatException { + byte type = fSchema.type; + switch (type) { + + case DataType.CHARARRAY: + case DataType.BIGCHARARRAY: + return new HCatFieldSchema(fSchema.alias, Type.STRING, null); - /** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing - * schema of the table in metastore. - */ - protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { - List fieldSchemas = new ArrayList(pigSchema.size()); - for (FieldSchema fSchema : pigSchema.getFields()) { - try { - HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); - - fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema)); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } + case DataType.INTEGER: + if (hcatFieldSchema != null) { + if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) { + throw new FrontendException("Unsupported type: " + type + " in Pig's schema", + PigHCatUtil.PIG_EXCEPTION_CODE); } - return new HCatSchema(fieldSchemas); + return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getType(), null); + } else { + return new HCatFieldSchema(fSchema.alias, Type.INT, null); + } + + case DataType.LONG: + return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null); + + case DataType.FLOAT: + return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null); + + case DataType.DOUBLE: + return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null); + + case DataType.BYTEARRAY: + return new HCatFieldSchema(fSchema.alias, Type.BINARY, null); + + case DataType.BAG: + Schema bagSchema = fSchema.schema; + List arrFields = new ArrayList(1); + FieldSchema field; + // Find out if we need to throw away the tuple or not. + if (removeTupleFromBag(hcatFieldSchema, fSchema)) { + field = bagSchema.getField(0).schema.getField(0); + } else { + field = bagSchema.getField(0); + } + arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0))); + return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), ""); + + case DataType.TUPLE: + List fieldNames = new ArrayList(); + List hcatFSs = new ArrayList(); + HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); + List fields = fSchema.schema.getFields(); + for (int i = 0; i < fields.size(); i++) { + FieldSchema fieldSchema = fields.get(i); + fieldNames.add(fieldSchema.alias); + hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i))); + } + return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), ""); + + case DataType.MAP: { + // Pig's schema contain no type information about map's keys and + // values. So, if its a new column assume if its existing + // return whatever is contained in the existing column. + + HCatFieldSchema valFS; + List valFSList = new ArrayList(1); + + if (hcatFieldSchema != null) { + return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), ""); + } + + // Column not found in target table. Its a new column. Its schema is map + valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, ""); + valFSList.add(valFS); + return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, new HCatSchema(valFSList), ""); } - public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { - if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { - return true; - } - // Column was not found in table schema. Its a new column - List tupSchema = bagFieldSchema.schema.getFields(); - if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { - return true; - } - return false; + default: + throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE); } + } + @Override + public void prepareToWrite(RecordWriter writer) throws IOException { + this.writer = writer; + computedSchema = (HCatSchema) ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).getProperty(COMPUTED_OUTPUT_SCHEMA)); + } - private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema) throws FrontendException, HCatException { - byte type = fSchema.type; - switch (type) { - - case DataType.CHARARRAY: - case DataType.BIGCHARARRAY: - return new HCatFieldSchema(fSchema.alias, Type.STRING, null); - - case DataType.INTEGER: - if (hcatFieldSchema != null) { - if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) { - throw new FrontendException("Unsupported type: " + type + " in Pig's schema", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getType(), null); - } else { - return new HCatFieldSchema(fSchema.alias, Type.INT, null); - } - - case DataType.LONG: - return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null); - - case DataType.FLOAT: - return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null); - - case DataType.DOUBLE: - return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null); - - case DataType.BYTEARRAY: - return new HCatFieldSchema(fSchema.alias, Type.BINARY, null); - - case DataType.BAG: - Schema bagSchema = fSchema.schema; - List arrFields = new ArrayList(1); - FieldSchema field; - // Find out if we need to throw away the tuple or not. - if (removeTupleFromBag(hcatFieldSchema, fSchema)) { - field = bagSchema.getField(0).schema.getField(0); - } else { - field = bagSchema.getField(0); - } - arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0))); - return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), ""); - - case DataType.TUPLE: - List fieldNames = new ArrayList(); - List hcatFSs = new ArrayList(); - HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); - List fields = fSchema.schema.getFields(); - for (int i = 0; i < fields.size(); i++) { - FieldSchema fieldSchema = fields.get(i); - fieldNames.add(fieldSchema.alias); - hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i))); - } - return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), ""); - - case DataType.MAP: { - // Pig's schema contain no type information about map's keys and - // values. So, if its a new column assume if its existing - // return whatever is contained in the existing column. - - HCatFieldSchema valFS; - List valFSList = new ArrayList(1); - - if (hcatFieldSchema != null) { - return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), ""); - } - - // Column not found in target table. Its a new column. Its schema is map - valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, ""); - valFSList.add(valFS); - return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, new HCatSchema(valFSList), ""); - } + @Override + public void putNext(Tuple tuple) throws IOException { - default: - throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE); - } - } + List outgoing = new ArrayList(tuple.size()); - @Override - public void prepareToWrite(RecordWriter writer) throws IOException { - this.writer = writer; - computedSchema = (HCatSchema) ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).getProperty(COMPUTED_OUTPUT_SCHEMA)); + int i = 0; + for (HCatFieldSchema fSchema : computedSchema.getFields()) { + outgoing.add(getJavaObj(tuple.get(i++), fSchema)); } + try { + writer.write(null, new DefaultHCatRecord(outgoing)); + } catch (InterruptedException e) { + throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); + } + } - @Override - public void putNext(Tuple tuple) throws IOException { + private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException { + try { - List outgoing = new ArrayList(tuple.size()); + // The real work-horse. Spend time and energy in this method if there is + // need to keep HCatStorer lean and go fast. + Type type = hcatFS.getType(); + switch (type) { - int i = 0; - for (HCatFieldSchema fSchema : computedSchema.getFields()) { - outgoing.add(getJavaObj(tuple.get(i++), fSchema)); - } - try { - writer.write(null, new DefaultHCatRecord(outgoing)); - } catch (InterruptedException e) { - throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); + case BINARY: + if (pigObj == null) { + return null; } - } + return ((DataByteArray) pigObj).get(); - private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException { - try { - - // The real work-horse. Spend time and energy in this method if there is - // need to keep HCatStorer lean and go fast. - Type type = hcatFS.getType(); - switch (type) { - - case BINARY: - if (pigObj == null) { - return null; - } - return ((DataByteArray) pigObj).get(); - - case STRUCT: - if (pigObj == null) { - return null; - } - HCatSchema structSubSchema = hcatFS.getStructSubSchema(); - // Unwrap the tuple. - List all = ((Tuple) pigObj).getAll(); - ArrayList converted = new ArrayList(all.size()); - for (int i = 0; i < all.size(); i++) { - converted.add(getJavaObj(all.get(i), structSubSchema.get(i))); - } - return converted; - - case ARRAY: - if (pigObj == null) { - return null; - } - // Unwrap the bag. - DataBag pigBag = (DataBag) pigObj; - HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); - boolean needTuple = tupFS.getType() == Type.STRUCT; - List bagContents = new ArrayList((int) pigBag.size()); - Iterator bagItr = pigBag.iterator(); - - while (bagItr.hasNext()) { - // If there is only one element in tuple contained in bag, we throw away the tuple. - bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS)); - - } - return bagContents; - case MAP: - if (pigObj == null) { - return null; - } - Map pigMap = (Map) pigObj; - Map typeMap = new HashMap(); - for (Entry entry : pigMap.entrySet()) { - // the value has a schema and not a FieldSchema - typeMap.put( - // Schema validation enforces that the Key is a String - (String) entry.getKey(), - getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0))); - } - return typeMap; - case STRING: - case INT: - case BIGINT: - case FLOAT: - case DOUBLE: - return pigObj; - case SMALLINT: - if (pigObj == null) { - return null; - } - if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) { - throw new BackendException("Value " + pigObj + " is outside the bounds of column " + - hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - return ((Integer) pigObj).shortValue(); - case TINYINT: - if (pigObj == null) { - return null; - } - if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) { - throw new BackendException("Value " + pigObj + " is outside the bounds of column " + - hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - return ((Integer) pigObj).byteValue(); - case BOOLEAN: - // would not pass schema validation anyway - throw new BackendException("Incompatible type " + type + " found in hcat table schema: " + hcatFS, PigHCatUtil.PIG_EXCEPTION_CODE); - default: - throw new BackendException("Unexpected type " + type + " for value " + pigObj + (pigObj == null ? "" : " of class " + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); - } - } catch (BackendException e) { - // provide the path to the field in the error message - throw new BackendException( - (hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), - e.getCause() == null ? e : e.getCause()); + case STRUCT: + if (pigObj == null) { + return null; } - } + HCatSchema structSubSchema = hcatFS.getStructSubSchema(); + // Unwrap the tuple. + List all = ((Tuple) pigObj).getAll(); + ArrayList converted = new ArrayList(all.size()); + for (int i = 0; i < all.size(); i++) { + converted.add(getJavaObj(all.get(i), structSubSchema.get(i))); + } + return converted; - @Override - public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { + case ARRAY: + if (pigObj == null) { + return null; + } + // Unwrap the bag. + DataBag pigBag = (DataBag) pigObj; + HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); + boolean needTuple = tupFS.getType() == Type.STRUCT; + List bagContents = new ArrayList((int) pigBag.size()); + Iterator bagItr = pigBag.iterator(); - // Need to necessarily override this method since default impl assumes HDFS - // based location string. - return location; - } + while (bagItr.hasNext()) { + // If there is only one element in tuple contained in bag, we throw away the tuple. + bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS)); - @Override - public void setStoreFuncUDFContextSignature(String signature) { - sign = signature; + } + return bagContents; + case MAP: + if (pigObj == null) { + return null; + } + Map pigMap = (Map) pigObj; + Map typeMap = new HashMap(); + for (Entry entry : pigMap.entrySet()) { + // the value has a schema and not a FieldSchema + typeMap.put( + // Schema validation enforces that the Key is a String + (String) entry.getKey(), + getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0))); + } + return typeMap; + case STRING: + case INT: + case BIGINT: + case FLOAT: + case DOUBLE: + return pigObj; + case SMALLINT: + if (pigObj == null) { + return null; + } + if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) { + throw new BackendException("Value " + pigObj + " is outside the bounds of column " + + hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + return ((Integer) pigObj).shortValue(); + case TINYINT: + if (pigObj == null) { + return null; + } + if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) { + throw new BackendException("Value " + pigObj + " is outside the bounds of column " + + hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + return ((Integer) pigObj).byteValue(); + case BOOLEAN: + // would not pass schema validation anyway + throw new BackendException("Incompatible type " + type + " found in hcat table schema: " + hcatFS, PigHCatUtil.PIG_EXCEPTION_CODE); + default: + throw new BackendException("Unexpected type " + type + " for value " + pigObj + (pigObj == null ? "" : " of class " + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); + } + } catch (BackendException e) { + // provide the path to the field in the error message + throw new BackendException( + (hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), + e.getCause() == null ? e : e.getCause()); } + } + @Override + public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { - protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { + // Need to necessarily override this method since default impl assumes HDFS + // based location string. + return location; + } - // Iterate through all the elements in Pig Schema and do validations as - // dictated by semantics, consult HCatSchema of table when need be. + @Override + public void setStoreFuncUDFContextSignature(String signature) { + sign = signature; + } - for (FieldSchema pigField : pigSchema.getFields()) { - HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); - validateSchema(pigField, hcatField); - } - try { - PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); - } catch (IOException e) { - throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - } + protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { + // Iterate through all the elements in Pig Schema and do validations as + // dictated by semantics, consult HCatSchema of table when need be. - private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField) - throws HCatException, FrontendException { - validateAlias(pigField.alias); - byte type = pigField.type; - if (DataType.isComplex(type)) { - switch (type) { - - case DataType.MAP: - if (hcatField != null) { - if (hcatField.getMapKeyType() != Type.STRING) { - throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); - } - // Map values can be primitive or complex - } - break; - - case DataType.BAG: - HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema(); - for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { - validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema)); - } - break; - - case DataType.TUPLE: - HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema(); - for (FieldSchema innerField : pigField.schema.getFields()) { - validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema)); - } - break; - - default: - throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE); - } - } + for (FieldSchema pigField : pigSchema.getFields()) { + HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); + validateSchema(pigField, hcatField); } - private void validateAlias(String alias) throws FrontendException { - if (alias == null) { - throw new FrontendException("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.", PigHCatUtil.PIG_EXCEPTION_CODE); + try { + PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); + } catch (IOException e) { + throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); + } + } + + + private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField) + throws HCatException, FrontendException { + validateAlias(pigField.alias); + byte type = pigField.type; + if (DataType.isComplex(type)) { + switch (type) { + + case DataType.MAP: + if (hcatField != null) { + if (hcatField.getMapKeyType() != Type.STRING) { + throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); + } + // Map values can be primitive or complex } - if (alias.matches(".*[A-Z]+.*")) { - throw new FrontendException("Column names should all be in lowercase. Invalid name found: " + alias, PigHCatUtil.PIG_EXCEPTION_CODE); + break; + + case DataType.BAG: + HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema(); + for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { + validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema)); } - } + break; - // Finds column by name in HCatSchema, if not found returns null. - private HCatFieldSchema getColFromSchema(String alias, HCatSchema tblSchema) { - if (tblSchema != null) { - for (HCatFieldSchema hcatField : tblSchema.getFields()) { - if (hcatField != null && hcatField.getName() != null && hcatField.getName().equalsIgnoreCase(alias)) { - return hcatField; - } - } + case DataType.TUPLE: + HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema(); + for (FieldSchema innerField : pigField.schema.getFields()) { + validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema)); } - // Its a new column - return null; - } + break; - @Override - public void cleanupOnFailure(String location, Job job) throws IOException { - // No-op. + default: + throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE); + } } + } - @Override - public void storeStatistics(ResourceStatistics stats, String arg1, Job job) throws IOException { + private void validateAlias(String alias) throws FrontendException { + if (alias == null) { + throw new FrontendException("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.", PigHCatUtil.PIG_EXCEPTION_CODE); + } + if (alias.matches(".*[A-Z]+.*")) { + throw new FrontendException("Column names should all be in lowercase. Invalid name found: " + alias, PigHCatUtil.PIG_EXCEPTION_CODE); + } + } + + // Finds column by name in HCatSchema, if not found returns null. + private HCatFieldSchema getColFromSchema(String alias, HCatSchema tblSchema) { + if (tblSchema != null) { + for (HCatFieldSchema hcatField : tblSchema.getFields()) { + if (hcatField != null && hcatField.getName() != null && hcatField.getName().equalsIgnoreCase(alias)) { + return hcatField; + } + } } + // Its a new column + return null; + } + + @Override + public void cleanupOnFailure(String location, Job job) throws IOException { + // No-op. + } + + @Override + public void storeStatistics(ResourceStatistics stats, String arg1, Job job) throws IOException { + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java index 5d56c61..a32149c 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java @@ -53,222 +53,222 @@ public class HCatLoader extends HCatBaseLoader { - private static final String PARTITION_FILTER = "partition.filter"; // for future use + private static final String PARTITION_FILTER = "partition.filter"; // for future use - private HCatInputFormat hcatInputFormat = null; - private String dbName; - private String tableName; - private String hcatServerUri; - private String partitionFilterString; - private final PigHCatUtil phutil = new PigHCatUtil(); + private HCatInputFormat hcatInputFormat = null; + private String dbName; + private String tableName; + private String hcatServerUri; + private String partitionFilterString; + private final PigHCatUtil phutil = new PigHCatUtil(); - // Signature for wrapped loader, see comments in LoadFuncBasedInputDriver.initialize - final public static String INNER_SIGNATURE = "hcatloader.inner.signature"; - final public static String INNER_SIGNATURE_PREFIX = "hcatloader_inner_signature"; - // A hash map which stores job credentials. The key is a signature passed by Pig, which is - //unique to the load func and input file name (table, in our case). - private static Map jobCredentials = new HashMap(); + // Signature for wrapped loader, see comments in LoadFuncBasedInputDriver.initialize + final public static String INNER_SIGNATURE = "hcatloader.inner.signature"; + final public static String INNER_SIGNATURE_PREFIX = "hcatloader_inner_signature"; + // A hash map which stores job credentials. The key is a signature passed by Pig, which is + //unique to the load func and input file name (table, in our case). + private static Map jobCredentials = new HashMap(); - @Override - public InputFormat getInputFormat() throws IOException { - if (hcatInputFormat == null) { - hcatInputFormat = new HCatInputFormat(); - } - return hcatInputFormat; - } - - @Override - public String relativeToAbsolutePath(String location, Path curDir) throws IOException { - return location; + @Override + public InputFormat getInputFormat() throws IOException { + if (hcatInputFormat == null) { + hcatInputFormat = new HCatInputFormat(); } + return hcatInputFormat; + } - @Override - public void setLocation(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); + @Override + public String relativeToAbsolutePath(String location, Path curDir) throws IOException { + return location; + } - UDFContext udfContext = UDFContext.getUDFContext(); - Properties udfProps = udfContext.getUDFProperties(this.getClass(), - new String[]{signature}); - job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); - Pair dbTablePair = PigHCatUtil.getDBTableNames(location); - dbName = dbTablePair.first; - tableName = dbTablePair.second; + @Override + public void setLocation(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); - RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps - .get(PRUNE_PROJECTION_INFO); - // get partitionFilterString stored in the UDFContext - it would have - // been stored there by an earlier call to setPartitionFilter - // call setInput on HCatInputFormat only in the frontend because internally - // it makes calls to the hcat server - we don't want these to happen in - // the backend - // in the hadoop front end mapred.task.id property will not be set in - // the Configuration - if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { - for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { - PigHCatUtil.getConfigFromUDFProperties(udfProps, - job.getConfiguration(), emr.nextElement().toString()); - } - if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { - //Combine credentials and credentials from job takes precedence for freshness - Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); - crd.addAll(job.getCredentials()); - job.getCredentials().addAll(crd); - } - } else { - Job clone = new Job(job.getConfiguration()); - HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); + UDFContext udfContext = UDFContext.getUDFContext(); + Properties udfProps = udfContext.getUDFProperties(this.getClass(), + new String[]{signature}); + job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); + Pair dbTablePair = PigHCatUtil.getDBTableNames(location); + dbName = dbTablePair.first; + tableName = dbTablePair.second; - // We will store all the new /changed properties in the job in the - // udf context, so the the HCatInputFormat.setInput method need not - //be called many times. - for (Entry keyValue : job.getConfiguration()) { - String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); - if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { - udfProps.put(keyValue.getKey(), keyValue.getValue()); - } - } - udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); + RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps + .get(PRUNE_PROJECTION_INFO); + // get partitionFilterString stored in the UDFContext - it would have + // been stored there by an earlier call to setPartitionFilter + // call setInput on HCatInputFormat only in the frontend because internally + // it makes calls to the hcat server - we don't want these to happen in + // the backend + // in the hadoop front end mapred.task.id property will not be set in + // the Configuration + if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { + for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { + PigHCatUtil.getConfigFromUDFProperties(udfProps, + job.getConfiguration(), emr.nextElement().toString()); + } + if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { + //Combine credentials and credentials from job takes precedence for freshness + Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); + crd.addAll(job.getCredentials()); + job.getCredentials().addAll(crd); + } + } else { + Job clone = new Job(job.getConfiguration()); + HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); - //Store credentials in a private hash map and not the udf context to - // make sure they are not public. - Credentials crd = new Credentials(); - crd.addAll(job.getCredentials()); - jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); + // We will store all the new /changed properties in the job in the + // udf context, so the the HCatInputFormat.setInput method need not + //be called many times. + for (Entry keyValue : job.getConfiguration()) { + String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); + if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { + udfProps.put(keyValue.getKey(), keyValue.getValue()); } + } + udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); - // Need to also push projections by calling setOutputSchema on - // HCatInputFormat - we have to get the RequiredFields information - // from the UdfContext, translate it to an Schema and then pass it - // The reason we do this here is because setLocation() is called by - // Pig runtime at InputFormat.getSplits() and - // InputFormat.createRecordReader() time - we are not sure when - // HCatInputFormat needs to know about pruned projections - so doing it - // here will ensure we communicate to HCatInputFormat about pruned - // projections at getSplits() and createRecordReader() time + //Store credentials in a private hash map and not the udf context to + // make sure they are not public. + Credentials crd = new Credentials(); + crd.addAll(job.getCredentials()); + jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); + } - if (requiredFieldsInfo != null) { - // convert to hcatschema and pass to HCatInputFormat - try { - outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); - HCatInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } else { - // else - this means pig's optimizer never invoked the pushProjection - // method - so we need all fields and hence we should not call the - // setOutputSchema on HCatInputFormat - if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { - try { - HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); - outputSchema = hcatTableSchema; - HCatInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } - } + // Need to also push projections by calling setOutputSchema on + // HCatInputFormat - we have to get the RequiredFields information + // from the UdfContext, translate it to an Schema and then pass it + // The reason we do this here is because setLocation() is called by + // Pig runtime at InputFormat.getSplits() and + // InputFormat.createRecordReader() time - we are not sure when + // HCatInputFormat needs to know about pruned projections - so doing it + // here will ensure we communicate to HCatInputFormat about pruned + // projections at getSplits() and createRecordReader() time + if (requiredFieldsInfo != null) { + // convert to hcatschema and pass to HCatInputFormat + try { + outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); + HCatInputFormat.setOutputSchema(job, outputSchema); + } catch (Exception e) { + throw new IOException(e); + } + } else { + // else - this means pig's optimizer never invoked the pushProjection + // method - so we need all fields and hence we should not call the + // setOutputSchema on HCatInputFormat + if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { + try { + HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); + outputSchema = hcatTableSchema; + HCatInputFormat.setOutputSchema(job, outputSchema); + } catch (Exception e) { + throw new IOException(e); + } + } } - @Override - public String[] getPartitionKeys(String location, Job job) - throws IOException { - Table table = phutil.getTable(location, - hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); - List tablePartitionKeys = table.getPartitionKeys(); - String[] partitionKeys = new String[tablePartitionKeys.size()]; - for (int i = 0; i < tablePartitionKeys.size(); i++) { - partitionKeys[i] = tablePartitionKeys.get(i).getName(); - } - return partitionKeys; + } + + @Override + public String[] getPartitionKeys(String location, Job job) + throws IOException { + Table table = phutil.getTable(location, + hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), + PigHCatUtil.getHCatServerPrincipal(job)); + List tablePartitionKeys = table.getPartitionKeys(); + String[] partitionKeys = new String[tablePartitionKeys.size()]; + for (int i = 0; i < tablePartitionKeys.size(); i++) { + partitionKeys[i] = tablePartitionKeys.get(i).getName(); } + return partitionKeys; + } - @Override - public ResourceSchema getSchema(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); + @Override + public ResourceSchema getSchema(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); - Table table = phutil.getTable(location, - hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); - HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); - try { - PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); - } catch (IOException e) { - throw new PigException( - "Table schema incompatible for reading through HCatLoader :" + e.getMessage() - + ";[Table schema was " + hcatTableSchema.toString() + "]" - , PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema); - outputSchema = hcatTableSchema; - return PigHCatUtil.getResourceSchema(hcatTableSchema); + Table table = phutil.getTable(location, + hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), + PigHCatUtil.getHCatServerPrincipal(job)); + HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); + try { + PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); + } catch (IOException e) { + throw new PigException( + "Table schema incompatible for reading through HCatLoader :" + e.getMessage() + + ";[Table schema was " + hcatTableSchema.toString() + "]" + , PigHCatUtil.PIG_EXCEPTION_CODE, e); } + storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema); + outputSchema = hcatTableSchema; + return PigHCatUtil.getResourceSchema(hcatTableSchema); + } - @Override - public void setPartitionFilter(Expression partitionFilter) throws IOException { - // convert the partition filter expression into a string expected by - // hcat and pass it in setLocation() + @Override + public void setPartitionFilter(Expression partitionFilter) throws IOException { + // convert the partition filter expression into a string expected by + // hcat and pass it in setLocation() - partitionFilterString = getHCatComparisonString(partitionFilter); + partitionFilterString = getHCatComparisonString(partitionFilter); - // store this in the udf context so we can get it later - storeInUDFContext(signature, - PARTITION_FILTER, partitionFilterString); - } + // store this in the udf context so we can get it later + storeInUDFContext(signature, + PARTITION_FILTER, partitionFilterString); + } - /** - * Get statistics about the data to be loaded. Only input data size is implemented at this time. - */ - @Override - public ResourceStatistics getStatistics(String location, Job job) throws IOException { - try { - ResourceStatistics stats = new ResourceStatistics(); - InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( - job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); - stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024); - return stats; - } catch (Exception e) { - throw new IOException(e); - } + /** + * Get statistics about the data to be loaded. Only input data size is implemented at this time. + */ + @Override + public ResourceStatistics getStatistics(String location, Job job) throws IOException { + try { + ResourceStatistics stats = new ResourceStatistics(); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( + job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024); + return stats; + } catch (Exception e) { + throw new IOException(e); } + } - private String getPartitionFilterString() { - if (partitionFilterString == null) { - Properties props = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[]{signature}); - partitionFilterString = props.getProperty(PARTITION_FILTER); - } - return partitionFilterString; + private String getPartitionFilterString() { + if (partitionFilterString == null) { + Properties props = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[]{signature}); + partitionFilterString = props.getProperty(PARTITION_FILTER); } + return partitionFilterString; + } - private String getHCatComparisonString(Expression expr) { - if (expr instanceof BinaryExpression) { - // call getHCatComparisonString on lhs and rhs, and and join the - // results with OpType string + private String getHCatComparisonString(Expression expr) { + if (expr instanceof BinaryExpression) { + // call getHCatComparisonString on lhs and rhs, and and join the + // results with OpType string - // we can just use OpType.toString() on all Expression types except - // Equal, NotEqualt since Equal has '==' in toString() and - // we need '=' - String opStr = null; - switch (expr.getOpType()) { - case OP_EQ: - opStr = " = "; - break; - default: - opStr = expr.getOpType().toString(); - } - BinaryExpression be = (BinaryExpression) expr; - return "(" + getHCatComparisonString(be.getLhs()) + - opStr + - getHCatComparisonString(be.getRhs()) + ")"; - } else { - // should be a constant or column - return expr.toString(); - } + // we can just use OpType.toString() on all Expression types except + // Equal, NotEqualt since Equal has '==' in toString() and + // we need '=' + String opStr = null; + switch (expr.getOpType()) { + case OP_EQ: + opStr = " = "; + break; + default: + opStr = expr.getOpType().toString(); + } + BinaryExpression be = (BinaryExpression) expr; + return "(" + getHCatComparisonString(be.getLhs()) + + opStr + + getHCatComparisonString(be.getRhs()) + ")"; + } else { + // should be a constant or column + return expr.toString(); } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatStorer.java index 6cbe926..95583ba 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatStorer.java @@ -52,117 +52,117 @@ public class HCatStorer extends HCatBaseStorer { - // Signature for wrapped storer, see comments in LoadFuncBasedInputDriver.initialize - final public static String INNER_SIGNATURE = "hcatstorer.inner.signature"; - final public static String INNER_SIGNATURE_PREFIX = "hcatstorer_inner_signature"; - // A hash map which stores job credentials. The key is a signature passed by Pig, which is - //unique to the store func and out file name (table, in our case). - private static Map jobCredentials = new HashMap(); - - - public HCatStorer(String partSpecs, String schema) throws Exception { - super(partSpecs, schema); - } - - public HCatStorer(String partSpecs) throws Exception { - this(partSpecs, null); - } - - public HCatStorer() throws Exception { - this(null, null); - } - - @Override - public OutputFormat getOutputFormat() throws IOException { - return new HCatOutputFormat(); - } - - @Override - public void setStoreLocation(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false); - - Configuration config = job.getConfiguration(); - config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); - Properties udfProps = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[]{sign}); - String[] userStr = location.split("\\."); - - if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { - for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { - PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); - } - Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); - if (crd != null) { - job.getCredentials().addAll(crd); - } - } else { - Job clone = new Job(job.getConfiguration()); - OutputJobInfo outputJobInfo; - if (userStr.length == 2) { - outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); - } else if (userStr.length == 1) { - outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); - } else { - throw new FrontendException("location " + location - + " is invalid. It must be of the form [db.]table", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); - if (schema != null) { - pigSchema = schema; - } - if (pigSchema == null) { - throw new FrontendException( - "Schema for data cannot be determined.", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); - if (externalLocation != null) { - outputJobInfo.setLocation(externalLocation); - } - try { - HCatOutputFormat.setOutput(job, outputJobInfo); - } catch (HCatException he) { - // pass the message to the user - essentially something about - // the table - // information passed to HCatOutputFormat was not right - throw new PigException(he.getMessage(), - PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job); - try { - doSchemaValidations(pigSchema, hcatTblSchema); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); - HCatOutputFormat.setSchema(job, computedSchema); - udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); - - // We will store all the new /changed properties in the job in the - // udf context, so the the HCatOutputFormat.setOutput and setSchema - // methods need not be called many times. - for (Entry keyValue : job.getConfiguration()) { - String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); - if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { - udfProps.put(keyValue.getKey(), keyValue.getValue()); - } - } - //Store credentials in a private hash map and not the udf context to - // make sure they are not public. - jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); - udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); + // Signature for wrapped storer, see comments in LoadFuncBasedInputDriver.initialize + final public static String INNER_SIGNATURE = "hcatstorer.inner.signature"; + final public static String INNER_SIGNATURE_PREFIX = "hcatstorer_inner_signature"; + // A hash map which stores job credentials. The key is a signature passed by Pig, which is + //unique to the store func and out file name (table, in our case). + private static Map jobCredentials = new HashMap(); + + + public HCatStorer(String partSpecs, String schema) throws Exception { + super(partSpecs, schema); + } + + public HCatStorer(String partSpecs) throws Exception { + this(partSpecs, null); + } + + public HCatStorer() throws Exception { + this(null, null); + } + + @Override + public OutputFormat getOutputFormat() throws IOException { + return new HCatOutputFormat(); + } + + @Override + public void setStoreLocation(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false); + + Configuration config = job.getConfiguration(); + config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); + Properties udfProps = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[]{sign}); + String[] userStr = location.split("\\."); + + if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { + for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { + PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); + } + Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); + if (crd != null) { + job.getCredentials().addAll(crd); + } + } else { + Job clone = new Job(job.getConfiguration()); + OutputJobInfo outputJobInfo; + if (userStr.length == 2) { + outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); + } else if (userStr.length == 1) { + outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); + } else { + throw new FrontendException("location " + location + + " is invalid. It must be of the form [db.]table", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); + if (schema != null) { + pigSchema = schema; + } + if (pigSchema == null) { + throw new FrontendException( + "Schema for data cannot be determined.", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); + if (externalLocation != null) { + outputJobInfo.setLocation(externalLocation); + } + try { + HCatOutputFormat.setOutput(job, outputJobInfo); + } catch (HCatException he) { + // pass the message to the user - essentially something about + // the table + // information passed to HCatOutputFormat was not right + throw new PigException(he.getMessage(), + PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job); + try { + doSchemaValidations(pigSchema, hcatTblSchema); + } catch (HCatException he) { + throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); + HCatOutputFormat.setSchema(job, computedSchema); + udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); + + // We will store all the new /changed properties in the job in the + // udf context, so the the HCatOutputFormat.setOutput and setSchema + // methods need not be called many times. + for (Entry keyValue : job.getConfiguration()) { + String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); + if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { + udfProps.put(keyValue.getKey(), keyValue.getValue()); } + } + //Store credentials in a private hash map and not the udf context to + // make sure they are not public. + jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); + udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); } + } - @Override - public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { - ShimLoader.getHadoopShims().getHCatShim().commitJob(getOutputFormat(), job); - } + @Override + public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { + ShimLoader.getHadoopShims().getHCatShim().commitJob(getOutputFormat(), job); + } - @Override - public void cleanupOnFailure(String location, Job job) throws IOException { - ShimLoader.getHadoopShims().getHCatShim().abortJob(getOutputFormat(), job); - } + @Override + public void cleanupOnFailure(String location, Job job) throws IOException { + ShimLoader.getHadoopShims().getHCatShim().abortJob(getOutputFormat(), job); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java index c586cf6..a01d9e3 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java @@ -64,428 +64,428 @@ */ class PigHCatUtil { - private static final Logger LOG = LoggerFactory.getLogger(PigHCatUtil.class); - - static final int PIG_EXCEPTION_CODE = 1115; // http://wiki.apache.org/pig/PigErrorHandlingFunctionalSpecification#Error_codes - private static final String DEFAULT_DB = MetaStoreUtils.DEFAULT_DATABASE_NAME; - - private final Map, Table> hcatTableCache = - new HashMap, Table>(); - - private static final TupleFactory tupFac = TupleFactory.getInstance(); - - private static boolean pigHasBooleanSupport = false; - - /** - * Determine if the current Pig version supports boolean columns. This works around a - * dependency conflict preventing HCatalog from requiring a version of Pig with boolean - * field support and should be removed once HCATALOG-466 has been resolved. - */ - static { - // DETAILS: - // - // PIG-1429 added support for boolean fields, which shipped in 0.10.0; - // this version of Pig depends on antlr 3.4. - // - // HCatalog depends heavily on Hive, which at this time uses antlr 3.0.1. - // - // antlr 3.0.1 and 3.4 are incompatible, so Pig 0.10.0 and Hive cannot be depended on in the - // same project. Pig 0.8.0 did not use antlr for its parser and can coexist with Hive, - // so that Pig version is depended on by HCatalog at this time. - try { - Schema schema = Utils.getSchemaFromString("myBooleanField: boolean"); - pigHasBooleanSupport = (schema.getField("myBooleanField").type == DataType.BOOLEAN); - } catch (Throwable e) { - // pass - } - - if (!pigHasBooleanSupport) { - LOG.info("This version of Pig does not support boolean fields. To enable " - + "boolean-to-integer conversion, set the " - + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER - + "=true configuration parameter."); - } + private static final Logger LOG = LoggerFactory.getLogger(PigHCatUtil.class); + + static final int PIG_EXCEPTION_CODE = 1115; // http://wiki.apache.org/pig/PigErrorHandlingFunctionalSpecification#Error_codes + private static final String DEFAULT_DB = MetaStoreUtils.DEFAULT_DATABASE_NAME; + + private final Map, Table> hcatTableCache = + new HashMap, Table>(); + + private static final TupleFactory tupFac = TupleFactory.getInstance(); + + private static boolean pigHasBooleanSupport = false; + + /** + * Determine if the current Pig version supports boolean columns. This works around a + * dependency conflict preventing HCatalog from requiring a version of Pig with boolean + * field support and should be removed once HCATALOG-466 has been resolved. + */ + static { + // DETAILS: + // + // PIG-1429 added support for boolean fields, which shipped in 0.10.0; + // this version of Pig depends on antlr 3.4. + // + // HCatalog depends heavily on Hive, which at this time uses antlr 3.0.1. + // + // antlr 3.0.1 and 3.4 are incompatible, so Pig 0.10.0 and Hive cannot be depended on in the + // same project. Pig 0.8.0 did not use antlr for its parser and can coexist with Hive, + // so that Pig version is depended on by HCatalog at this time. + try { + Schema schema = Utils.getSchemaFromString("myBooleanField: boolean"); + pigHasBooleanSupport = (schema.getField("myBooleanField").type == DataType.BOOLEAN); + } catch (Throwable e) { + // pass } - static public Pair getDBTableNames(String location) throws IOException { - // the location string will be of the form: - // .
- parse it and - // communicate the information to HCatInputFormat - - try { - return HCatUtil.getDbAndTableName(location); - } catch (IOException e) { - String locationErrMsg = "The input location in load statement " + - "should be of the form " + - ".
or
. Got " + location; - throw new PigException(locationErrMsg, PIG_EXCEPTION_CODE); - } + if (!pigHasBooleanSupport) { + LOG.info("This version of Pig does not support boolean fields. To enable " + + "boolean-to-integer conversion, set the " + + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER + + "=true configuration parameter."); } - - static public String getHCatServerUri(Job job) { - - return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname); + } + + static public Pair getDBTableNames(String location) throws IOException { + // the location string will be of the form: + // .
- parse it and + // communicate the information to HCatInputFormat + + try { + return HCatUtil.getDbAndTableName(location); + } catch (IOException e) { + String locationErrMsg = "The input location in load statement " + + "should be of the form " + + ".
or
. Got " + location; + throw new PigException(locationErrMsg, PIG_EXCEPTION_CODE); } + } - static public String getHCatServerPrincipal(Job job) { + static public String getHCatServerUri(Job job) { - return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL); - } + return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname); + } - private static HiveMetaStoreClient getHiveMetaClient(String serverUri, - String serverKerberosPrincipal, Class clazz) throws Exception { - HiveConf hiveConf = new HiveConf(clazz); + static public String getHCatServerPrincipal(Job job) { - if (serverUri != null) { - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); - } + return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL); + } - if (serverKerberosPrincipal != null) { - hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal); - } + private static HiveMetaStoreClient getHiveMetaClient(String serverUri, + String serverKerberosPrincipal, Class clazz) throws Exception { + HiveConf hiveConf = new HiveConf(clazz); - try { - return HCatUtil.getHiveClient(hiveConf); - } catch (Exception e) { - throw new Exception("Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e); - } + if (serverUri != null) { + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); } + if (serverKerberosPrincipal != null) { + hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); + hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal); + } - HCatSchema getHCatSchema(List fields, String signature, Class classForUDFCLookup) throws IOException { - if (fields == null) { - return null; - } - - Properties props = UDFContext.getUDFContext().getUDFProperties( - classForUDFCLookup, new String[]{signature}); - HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); - - ArrayList fcols = new ArrayList(); - for (RequiredField rf : fields) { - fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); - } - return new HCatSchema(fcols); + try { + return HCatUtil.getHiveClient(hiveConf); + } catch (Exception e) { + throw new Exception("Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e); } + } - public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException { - Pair loc_server = new Pair(location, hcatServerUri); - Table hcatTable = hcatTableCache.get(loc_server); - if (hcatTable != null) { - return hcatTable; - } - Pair dbTablePair = PigHCatUtil.getDBTableNames(location); - String dbName = dbTablePair.first; - String tableName = dbTablePair.second; - Table table = null; - HiveMetaStoreClient client = null; - try { - client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class); - table = HCatUtil.getTable(client, dbName, tableName); - } catch (NoSuchObjectException nsoe) { - throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend - } catch (Exception e) { - throw new IOException(e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - hcatTableCache.put(loc_server, table); - return table; + HCatSchema getHCatSchema(List fields, String signature, Class classForUDFCLookup) throws IOException { + if (fields == null) { + return null; } - public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException { + Properties props = UDFContext.getUDFContext().getUDFProperties( + classForUDFCLookup, new String[]{signature}); + HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); - List rfSchemaList = new ArrayList(); - for (HCatFieldSchema hfs : hcatSchema.getFields()) { - ResourceFieldSchema rfSchema; - rfSchema = getResourceSchemaFromFieldSchema(hfs); - rfSchemaList.add(rfSchema); - } - ResourceSchema rSchema = new ResourceSchema(); - rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0])); - return rSchema; - - } - - private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) - throws IOException { - ResourceFieldSchema rfSchema; - // if we are dealing with a bag or tuple column - need to worry about subschema - if (hfs.getType() == Type.STRUCT) { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(getTupleSubSchema(hfs)); - } else if (hfs.getType() == Type.ARRAY) { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(getBagSubSchema(hfs)); - } else { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(null); // no munging inner-schemas - } - return rfSchema; + ArrayList fcols = new ArrayList(); + for (RequiredField rf : fields) { + fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); + } + return new HCatSchema(fcols); + } + + public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException { + Pair loc_server = new Pair(location, hcatServerUri); + Table hcatTable = hcatTableCache.get(loc_server); + if (hcatTable != null) { + return hcatTable; } - protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException { - // there are two cases - array and array> - // in either case the element type of the array is represented in a - // tuple field schema in the bag's field schema - the second case (struct) - // more naturally translates to the tuple - in the first case (array) - // we simulate the tuple by putting the single field in a tuple - - Properties props = UDFContext.getUDFContext().getClientSystemProps(); - String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT; - if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) { - innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME) - .replaceAll("FIELDNAME", hfs.getName()); - } - String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT; - if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) { - innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME) - .replaceAll("FIELDNAME", hfs.getName()); - } + Pair dbTablePair = PigHCatUtil.getDBTableNames(location); + String dbName = dbTablePair.first; + String tableName = dbTablePair.second; + Table table = null; + HiveMetaStoreClient client = null; + try { + client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class); + table = HCatUtil.getTable(client, dbName, tableName); + } catch (NoSuchObjectException nsoe) { + throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend + } catch (Exception e) { + throw new IOException(e); + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + hcatTableCache.put(loc_server, table); + return table; + } - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) - .setDescription("The tuple in the bag") - .setType(DataType.TUPLE); - HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); - if (arrayElementFieldSchema.getType() == Type.STRUCT) { - bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); - } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { - ResourceSchema s = new ResourceSchema(); - List lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); - s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); - bagSubFieldSchemas[0].setSchema(s); - } else { - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) - .setDescription("The inner field in the tuple in the bag") - .setType(getPigType(arrayElementFieldSchema)) - .setSchema(null); // the element type is not a tuple - so no subschema - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - } - ResourceSchema s = new ResourceSchema().setFields(bagSubFieldSchemas); - return s; + public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException { + List rfSchemaList = new ArrayList(); + for (HCatFieldSchema hfs : hcatSchema.getFields()) { + ResourceFieldSchema rfSchema; + rfSchema = getResourceSchemaFromFieldSchema(hfs); + rfSchemaList.add(rfSchema); } - - private static ResourceSchema getTupleSubSchema(HCatFieldSchema hfs) throws IOException { - // for each struct subfield, create equivalent ResourceFieldSchema - ResourceSchema s = new ResourceSchema(); - List lrfs = new ArrayList(); - for (HCatFieldSchema subField : hfs.getStructSubSchema().getFields()) { - lrfs.add(getResourceSchemaFromFieldSchema(subField)); - } - s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); - return s; + ResourceSchema rSchema = new ResourceSchema(); + rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0])); + return rSchema; + + } + + private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) + throws IOException { + ResourceFieldSchema rfSchema; + // if we are dealing with a bag or tuple column - need to worry about subschema + if (hfs.getType() == Type.STRUCT) { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(getTupleSubSchema(hfs)); + } else if (hfs.getType() == Type.ARRAY) { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(getBagSubSchema(hfs)); + } else { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(null); // no munging inner-schemas + } + return rfSchema; + } + + protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException { + // there are two cases - array and array> + // in either case the element type of the array is represented in a + // tuple field schema in the bag's field schema - the second case (struct) + // more naturally translates to the tuple - in the first case (array) + // we simulate the tuple by putting the single field in a tuple + + Properties props = UDFContext.getUDFContext().getClientSystemProps(); + String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT; + if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) { + innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME) + .replaceAll("FIELDNAME", hfs.getName()); + } + String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT; + if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) { + innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME) + .replaceAll("FIELDNAME", hfs.getName()); } - /** - * @param hfs the field schema of the column - * @return corresponding pig type - * @throws IOException - */ - static public byte getPigType(HCatFieldSchema hfs) throws IOException { - return getPigType(hfs.getType()); + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) + .setDescription("The tuple in the bag") + .setType(DataType.TUPLE); + HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); + if (arrayElementFieldSchema.getType() == Type.STRUCT) { + bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); + } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { + ResourceSchema s = new ResourceSchema(); + List lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); + s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); + bagSubFieldSchemas[0].setSchema(s); + } else { + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) + .setDescription("The inner field in the tuple in the bag") + .setType(getPigType(arrayElementFieldSchema)) + .setSchema(null); // the element type is not a tuple - so no subschema + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); } + ResourceSchema s = new ResourceSchema().setFields(bagSubFieldSchemas); + return s; - static public byte getPigType(Type type) throws IOException { - if (type == Type.STRING) { - return DataType.CHARARRAY; - } + } - if ((type == Type.INT) || (type == Type.SMALLINT) || (type == Type.TINYINT)) { - return DataType.INTEGER; - } + private static ResourceSchema getTupleSubSchema(HCatFieldSchema hfs) throws IOException { + // for each struct subfield, create equivalent ResourceFieldSchema + ResourceSchema s = new ResourceSchema(); + List lrfs = new ArrayList(); + for (HCatFieldSchema subField : hfs.getStructSubSchema().getFields()) { + lrfs.add(getResourceSchemaFromFieldSchema(subField)); + } + s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); + return s; + } + + /** + * @param hfs the field schema of the column + * @return corresponding pig type + * @throws IOException + */ + static public byte getPigType(HCatFieldSchema hfs) throws IOException { + return getPigType(hfs.getType()); + } + + static public byte getPigType(Type type) throws IOException { + if (type == Type.STRING) { + return DataType.CHARARRAY; + } - if (type == Type.ARRAY) { - return DataType.BAG; - } + if ((type == Type.INT) || (type == Type.SMALLINT) || (type == Type.TINYINT)) { + return DataType.INTEGER; + } - if (type == Type.STRUCT) { - return DataType.TUPLE; - } + if (type == Type.ARRAY) { + return DataType.BAG; + } - if (type == Type.MAP) { - return DataType.MAP; - } + if (type == Type.STRUCT) { + return DataType.TUPLE; + } - if (type == Type.BIGINT) { - return DataType.LONG; - } + if (type == Type.MAP) { + return DataType.MAP; + } - if (type == Type.FLOAT) { - return DataType.FLOAT; - } + if (type == Type.BIGINT) { + return DataType.LONG; + } - if (type == Type.DOUBLE) { - return DataType.DOUBLE; - } + if (type == Type.FLOAT) { + return DataType.FLOAT; + } - if (type == Type.BINARY) { - return DataType.BYTEARRAY; - } + if (type == Type.DOUBLE) { + return DataType.DOUBLE; + } - if (type == Type.BOOLEAN && pigHasBooleanSupport) { - return DataType.BOOLEAN; - } + if (type == Type.BINARY) { + return DataType.BYTEARRAY; + } - throw new PigException("HCatalog column type '" + type.toString() - + "' is not supported in Pig as a column type", PIG_EXCEPTION_CODE); + if (type == Type.BOOLEAN && pigHasBooleanSupport) { + return DataType.BOOLEAN; } - public static Tuple transformToTuple(HCatRecord hr, HCatSchema hs) throws Exception { - if (hr == null) { - return null; - } - return transformToTuple(hr.getAll(), hs); - } - - @SuppressWarnings("unchecked") - public static Object extractPigObject(Object o, HCatFieldSchema hfs) throws Exception { - Object result; - Type itemType = hfs.getType(); - switch (itemType) { - case BINARY: - result = (o == null) ? null : new DataByteArray((byte[]) o); - break; - case STRUCT: - result = transformToTuple((List) o, hfs); - break; - case ARRAY: - result = transformToBag((List) o, hfs); - break; - case MAP: - result = transformToPigMap((Map) o, hfs); - break; - default: - result = o; - break; - } - return result; - } - - private static Tuple transformToTuple(List objList, HCatFieldSchema hfs) throws Exception { - try { - return transformToTuple(objList, hfs.getStructSubSchema()); - } catch (Exception e) { - if (hfs.getType() != Type.STRUCT) { - throw new Exception("Expected Struct type, got " + hfs.getType(), e); - } else { - throw e; - } - } + throw new PigException("HCatalog column type '" + type.toString() + + "' is not supported in Pig as a column type", PIG_EXCEPTION_CODE); + } + + public static Tuple transformToTuple(HCatRecord hr, HCatSchema hs) throws Exception { + if (hr == null) { + return null; + } + return transformToTuple(hr.getAll(), hs); + } + + @SuppressWarnings("unchecked") + public static Object extractPigObject(Object o, HCatFieldSchema hfs) throws Exception { + Object result; + Type itemType = hfs.getType(); + switch (itemType) { + case BINARY: + result = (o == null) ? null : new DataByteArray((byte[]) o); + break; + case STRUCT: + result = transformToTuple((List) o, hfs); + break; + case ARRAY: + result = transformToBag((List) o, hfs); + break; + case MAP: + result = transformToPigMap((Map) o, hfs); + break; + default: + result = o; + break; + } + return result; + } + + private static Tuple transformToTuple(List objList, HCatFieldSchema hfs) throws Exception { + try { + return transformToTuple(objList, hfs.getStructSubSchema()); + } catch (Exception e) { + if (hfs.getType() != Type.STRUCT) { + throw new Exception("Expected Struct type, got " + hfs.getType(), e); + } else { + throw e; + } } + } - private static Tuple transformToTuple(List objList, HCatSchema hs) throws Exception { - if (objList == null) { - return null; - } - Tuple t = tupFac.newTuple(objList.size()); - List subFields = hs.getFields(); - for (int i = 0; i < subFields.size(); i++) { - t.set(i, extractPigObject(objList.get(i), subFields.get(i))); - } - return t; + private static Tuple transformToTuple(List objList, HCatSchema hs) throws Exception { + if (objList == null) { + return null; } + Tuple t = tupFac.newTuple(objList.size()); + List subFields = hs.getFields(); + for (int i = 0; i < subFields.size(); i++) { + t.set(i, extractPigObject(objList.get(i), subFields.get(i))); + } + return t; + } - private static Map transformToPigMap(Map map, HCatFieldSchema hfs) throws Exception { - if (map == null) { - return null; - } + private static Map transformToPigMap(Map map, HCatFieldSchema hfs) throws Exception { + if (map == null) { + return null; + } - Map result = new HashMap(); - for (Entry entry : map.entrySet()) { - // since map key for Pig has to be Strings - result.put(entry.getKey().toString(), extractPigObject(entry.getValue(), hfs.getMapValueSchema().get(0))); - } - return result; + Map result = new HashMap(); + for (Entry entry : map.entrySet()) { + // since map key for Pig has to be Strings + result.put(entry.getKey().toString(), extractPigObject(entry.getValue(), hfs.getMapValueSchema().get(0))); } + return result; + } - @SuppressWarnings("unchecked") - private static DataBag transformToBag(List list, HCatFieldSchema hfs) throws Exception { - if (list == null) { - return null; - } + @SuppressWarnings("unchecked") + private static DataBag transformToBag(List list, HCatFieldSchema hfs) throws Exception { + if (list == null) { + return null; + } - HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); - DataBag db = new DefaultDataBag(); - for (Object o : list) { - Tuple tuple; - if (elementSubFieldSchema.getType() == Type.STRUCT) { - tuple = transformToTuple((List) o, elementSubFieldSchema); - } else { - // bags always contain tuples - tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); - } - db.add(tuple); - } - return db; + HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); + DataBag db = new DefaultDataBag(); + for (Object o : list) { + Tuple tuple; + if (elementSubFieldSchema.getType() == Type.STRUCT) { + tuple = transformToTuple((List) o, elementSubFieldSchema); + } else { + // bags always contain tuples + tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); + } + db.add(tuple); } + return db; + } - private static void validateHCatSchemaFollowsPigRules(HCatSchema tblSchema) throws PigException { - for (HCatFieldSchema hcatField : tblSchema.getFields()) { - validateHcatFieldFollowsPigRules(hcatField); - } + private static void validateHCatSchemaFollowsPigRules(HCatSchema tblSchema) throws PigException { + for (HCatFieldSchema hcatField : tblSchema.getFields()) { + validateHcatFieldFollowsPigRules(hcatField); } + } - private static void validateHcatFieldFollowsPigRules(HCatFieldSchema hcatField) throws PigException { - try { - Type hType = hcatField.getType(); - switch (hType) { - case BOOLEAN: - if (!pigHasBooleanSupport) { - throw new PigException("Incompatible type found in HCat table schema: " - + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); - } - break; - case ARRAY: - validateHCatSchemaFollowsPigRules(hcatField.getArrayElementSchema()); - break; - case STRUCT: - validateHCatSchemaFollowsPigRules(hcatField.getStructSubSchema()); - break; - case MAP: - // key is only string - if (hcatField.getMapKeyType() != Type.STRING) { - LOG.info("Converting non-String key of map " + hcatField.getName() + " from " - + hcatField.getMapKeyType() + " to String."); - } - validateHCatSchemaFollowsPigRules(hcatField.getMapValueSchema()); - break; - } - } catch (HCatException e) { - throw new PigException("Incompatible type found in hcat table schema: " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE, e); - } + private static void validateHcatFieldFollowsPigRules(HCatFieldSchema hcatField) throws PigException { + try { + Type hType = hcatField.getType(); + switch (hType) { + case BOOLEAN: + if (!pigHasBooleanSupport) { + throw new PigException("Incompatible type found in HCat table schema: " + + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); + } + break; + case ARRAY: + validateHCatSchemaFollowsPigRules(hcatField.getArrayElementSchema()); + break; + case STRUCT: + validateHCatSchemaFollowsPigRules(hcatField.getStructSubSchema()); + break; + case MAP: + // key is only string + if (hcatField.getMapKeyType() != Type.STRING) { + LOG.info("Converting non-String key of map " + hcatField.getName() + " from " + + hcatField.getMapKeyType() + " to String."); + } + validateHCatSchemaFollowsPigRules(hcatField.getMapValueSchema()); + break; + } + } catch (HCatException e) { + throw new PigException("Incompatible type found in hcat table schema: " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE, e); } + } - public static void validateHCatTableSchemaFollowsPigRules(HCatSchema hcatTableSchema) throws IOException { - validateHCatSchemaFollowsPigRules(hcatTableSchema); - } + public static void validateHCatTableSchemaFollowsPigRules(HCatSchema hcatTableSchema) throws IOException { + validateHCatSchemaFollowsPigRules(hcatTableSchema); + } - public static void getConfigFromUDFProperties(Properties p, Configuration config, String propName) { - if (p.getProperty(propName) != null) { - config.set(propName, p.getProperty(propName)); - } + public static void getConfigFromUDFProperties(Properties p, Configuration config, String propName) { + if (p.getProperty(propName) != null) { + config.set(propName, p.getProperty(propName)); } + } - public static void saveConfigIntoUDFProperties(Properties p, Configuration config, String propName) { - if (config.get(propName) != null) { - p.setProperty(propName, config.get(propName)); - } + public static void saveConfigIntoUDFProperties(Properties p, Configuration config, String propName) { + if (config.get(propName) != null) { + p.setProperty(propName, config.get(propName)); } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseLoader.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseLoader.java index b92131f..2747633 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseLoader.java @@ -49,105 +49,105 @@ abstract class HCatBaseLoader extends LoadFunc implements LoadMetadata, LoadPushDown { - protected static final String PRUNE_PROJECTION_INFO = "prune.projection.info"; - - private RecordReader reader; - protected String signature; - - HCatSchema outputSchema = null; - - - @Override - public Tuple getNext() throws IOException { - try { - HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null); - Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema); - // TODO : we were discussing an iter interface, and also a LazyTuple - // change this when plans for that solidifies. - return t; - } catch (ExecException e) { - int errCode = 6018; - String errMsg = "Error while reading input"; - throw new ExecException(errMsg, errCode, - PigException.REMOTE_ENVIRONMENT, e); - } catch (Exception eOther) { - int errCode = 6018; - String errMsg = "Error converting read value to tuple"; - throw new ExecException(errMsg, errCode, - PigException.REMOTE_ENVIRONMENT, eOther); - } - - } - - @Override - public void prepareToRead(RecordReader reader, PigSplit arg1) throws IOException { - this.reader = reader; - } - - @Override - public ResourceStatistics getStatistics(String location, Job job) throws IOException { - // statistics not implemented currently - return null; - } - - @Override - public List getFeatures() { - return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION); - } - - @Override - public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldsInfo) throws FrontendException { - // Store the required fields information in the UDFContext so that we - // can retrieve it later. - storeInUDFContext(signature, PRUNE_PROJECTION_INFO, requiredFieldsInfo); - - // HCat will always prune columns based on what we ask of it - so the - // response is true - return new RequiredFieldResponse(true); + protected static final String PRUNE_PROJECTION_INFO = "prune.projection.info"; + + private RecordReader reader; + protected String signature; + + HCatSchema outputSchema = null; + + + @Override + public Tuple getNext() throws IOException { + try { + HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null); + Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema); + // TODO : we were discussing an iter interface, and also a LazyTuple + // change this when plans for that solidifies. + return t; + } catch (ExecException e) { + int errCode = 6018; + String errMsg = "Error while reading input"; + throw new ExecException(errMsg, errCode, + PigException.REMOTE_ENVIRONMENT, e); + } catch (Exception eOther) { + int errCode = 6018; + String errMsg = "Error converting read value to tuple"; + throw new ExecException(errMsg, errCode, + PigException.REMOTE_ENVIRONMENT, eOther); } - @Override - public void setUDFContextSignature(String signature) { - this.signature = signature; - } - - - // helper methods - protected void storeInUDFContext(String signature, String key, Object value) { - UDFContext udfContext = UDFContext.getUDFContext(); - Properties props = udfContext.getUDFProperties( - this.getClass(), new String[]{signature}); - props.put(key, value); - } - - /** - * A utility method to get the size of inputs. This is accomplished by summing the - * size of all input paths on supported FileSystems. Locations whose size cannot be - * determined are ignored. Note non-FileSystem and unpartitioned locations will not - * report their input size by default. - */ - protected static long getSizeInBytes(InputJobInfo inputJobInfo) throws IOException { - Configuration conf = new Configuration(); - long sizeInBytes = 0; - - for (PartInfo partInfo : inputJobInfo.getPartitions()) { - try { - Path p = new Path(partInfo.getLocation()); - if (p.getFileSystem(conf).isFile(p)) { - sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen(); - } else { - FileStatus[] fileStatuses = p.getFileSystem(conf).listStatus(p); - if (fileStatuses != null) { - for (FileStatus child : fileStatuses) { - sizeInBytes += child.getLen(); - } - } - } - } catch (IOException e) { - // Report size to the extent possible. + } + + @Override + public void prepareToRead(RecordReader reader, PigSplit arg1) throws IOException { + this.reader = reader; + } + + @Override + public ResourceStatistics getStatistics(String location, Job job) throws IOException { + // statistics not implemented currently + return null; + } + + @Override + public List getFeatures() { + return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION); + } + + @Override + public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldsInfo) throws FrontendException { + // Store the required fields information in the UDFContext so that we + // can retrieve it later. + storeInUDFContext(signature, PRUNE_PROJECTION_INFO, requiredFieldsInfo); + + // HCat will always prune columns based on what we ask of it - so the + // response is true + return new RequiredFieldResponse(true); + } + + @Override + public void setUDFContextSignature(String signature) { + this.signature = signature; + } + + + // helper methods + protected void storeInUDFContext(String signature, String key, Object value) { + UDFContext udfContext = UDFContext.getUDFContext(); + Properties props = udfContext.getUDFProperties( + this.getClass(), new String[]{signature}); + props.put(key, value); + } + + /** + * A utility method to get the size of inputs. This is accomplished by summing the + * size of all input paths on supported FileSystems. Locations whose size cannot be + * determined are ignored. Note non-FileSystem and unpartitioned locations will not + * report their input size by default. + */ + protected static long getSizeInBytes(InputJobInfo inputJobInfo) throws IOException { + Configuration conf = new Configuration(); + long sizeInBytes = 0; + + for (PartInfo partInfo : inputJobInfo.getPartitions()) { + try { + Path p = new Path(partInfo.getLocation()); + if (p.getFileSystem(conf).isFile(p)) { + sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen(); + } else { + FileStatus[] fileStatuses = p.getFileSystem(conf).listStatus(p); + if (fileStatuses != null) { + for (FileStatus child : fileStatuses) { + sizeInBytes += child.getLen(); } + } } - - return sizeInBytes; + } catch (IOException e) { + // Report size to the extent possible. + } } + + return sizeInBytes; + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseStorer.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseStorer.java index 67b1999..06dbe72 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseStorer.java @@ -64,407 +64,407 @@ abstract class HCatBaseStorer extends StoreFunc implements StoreMetadata { - private static final Logger LOG = LoggerFactory.getLogger( HCatBaseStorer.class ); - - private static final List SUPPORTED_INTEGER_CONVERSIONS = - Lists.newArrayList(Type.TINYINT, Type.SMALLINT, Type.INT); - protected static final String COMPUTED_OUTPUT_SCHEMA = "hcat.output.schema"; - protected final List partitionKeys; - protected final Map partitions; - protected Schema pigSchema; - private RecordWriter, HCatRecord> writer; - protected HCatSchema computedSchema; - protected static final String PIG_SCHEMA = "hcat.pig.store.schema"; - protected String sign; - - public HCatBaseStorer(String partSpecs, String schema) throws Exception { - - partitionKeys = new ArrayList(); - partitions = new HashMap(); - if (partSpecs != null && !partSpecs.trim().isEmpty()) { - String[] partKVPs = partSpecs.split(","); - for (String partKVP : partKVPs) { - String[] partKV = partKVP.split("="); - if (partKV.length == 2) { - String partKey = partKV[0].trim(); - partitionKeys.add(partKey); - partitions.put(partKey, partKV[1].trim()); - } else { - throw new FrontendException("Invalid partition column specification. " + partSpecs, PigHCatUtil.PIG_EXCEPTION_CODE); - } - } + private static final Logger LOG = LoggerFactory.getLogger( HCatBaseStorer.class ); + + private static final List SUPPORTED_INTEGER_CONVERSIONS = + Lists.newArrayList(Type.TINYINT, Type.SMALLINT, Type.INT); + protected static final String COMPUTED_OUTPUT_SCHEMA = "hcat.output.schema"; + protected final List partitionKeys; + protected final Map partitions; + protected Schema pigSchema; + private RecordWriter, HCatRecord> writer; + protected HCatSchema computedSchema; + protected static final String PIG_SCHEMA = "hcat.pig.store.schema"; + protected String sign; + + public HCatBaseStorer(String partSpecs, String schema) throws Exception { + + partitionKeys = new ArrayList(); + partitions = new HashMap(); + if (partSpecs != null && !partSpecs.trim().isEmpty()) { + String[] partKVPs = partSpecs.split(","); + for (String partKVP : partKVPs) { + String[] partKV = partKVP.split("="); + if (partKV.length == 2) { + String partKey = partKV[0].trim(); + partitionKeys.add(partKey); + partitions.put(partKey, partKV[1].trim()); + } else { + throw new FrontendException("Invalid partition column specification. " + partSpecs, PigHCatUtil.PIG_EXCEPTION_CODE); } + } + } - if (schema != null) { - pigSchema = Utils.getSchemaFromString(schema); - } + if (schema != null) { + pigSchema = Utils.getSchemaFromString(schema); + } + } + + @Override + public void checkSchema(ResourceSchema resourceSchema) throws IOException { + + /* Schema provided by user and the schema computed by Pig + * at the time of calling store must match. + */ + Schema runtimeSchema = Schema.getPigSchema(resourceSchema); + if (pigSchema != null) { + if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { + throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + + "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + } else { + pigSchema = runtimeSchema; + } + UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); + } + + /** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing + * schema of the table in metastore. + */ + protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { + List fieldSchemas = new ArrayList(pigSchema.size()); + for (FieldSchema fSchema : pigSchema.getFields()) { + try { + HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); + + fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema)); + } catch (HCatException he) { + throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); + } } + return new HCatSchema(fieldSchemas); + } - @Override - public void checkSchema(ResourceSchema resourceSchema) throws IOException { - - /* Schema provided by user and the schema computed by Pig - * at the time of calling store must match. - */ - Schema runtimeSchema = Schema.getPigSchema(resourceSchema); - if (pigSchema != null) { - if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { - throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + - "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - } else { - pigSchema = runtimeSchema; - } - UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); + public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { + if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { + return true; + } + // Column was not found in table schema. Its a new column + List tupSchema = bagFieldSchema.schema.getFields(); + if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { + return true; } + return false; + } + + + private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema) throws FrontendException, HCatException { + byte type = fSchema.type; + switch (type) { + + case DataType.CHARARRAY: + case DataType.BIGCHARARRAY: + return new HCatFieldSchema(fSchema.alias, Type.STRING, null); - /** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing - * schema of the table in metastore. - */ - protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { - List fieldSchemas = new ArrayList(pigSchema.size()); - for (FieldSchema fSchema : pigSchema.getFields()) { - try { - HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); - - fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema)); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } + case DataType.INTEGER: + if (hcatFieldSchema != null) { + if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) { + throw new FrontendException("Unsupported type: " + type + " in Pig's schema", + PigHCatUtil.PIG_EXCEPTION_CODE); } - return new HCatSchema(fieldSchemas); + return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getType(), null); + } else { + return new HCatFieldSchema(fSchema.alias, Type.INT, null); + } + + case DataType.LONG: + return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null); + + case DataType.FLOAT: + return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null); + + case DataType.DOUBLE: + return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null); + + case DataType.BYTEARRAY: + return new HCatFieldSchema(fSchema.alias, Type.BINARY, null); + + case DataType.BOOLEAN: + return new HCatFieldSchema(fSchema.alias, Type.BOOLEAN, null); + + case DataType.BAG: + Schema bagSchema = fSchema.schema; + List arrFields = new ArrayList(1); + FieldSchema field; + // Find out if we need to throw away the tuple or not. + if (removeTupleFromBag(hcatFieldSchema, fSchema)) { + field = bagSchema.getField(0).schema.getField(0); + } else { + field = bagSchema.getField(0); + } + arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0))); + return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), ""); + + case DataType.TUPLE: + List fieldNames = new ArrayList(); + List hcatFSs = new ArrayList(); + HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); + List fields = fSchema.schema.getFields(); + for (int i = 0; i < fields.size(); i++) { + FieldSchema fieldSchema = fields.get(i); + fieldNames.add(fieldSchema.alias); + hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i))); + } + return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), ""); + + case DataType.MAP: { + // Pig's schema contain no type information about map's keys and + // values. So, if its a new column assume if its existing + // return whatever is contained in the existing column. + + HCatFieldSchema valFS; + List valFSList = new ArrayList(1); + + if (hcatFieldSchema != null) { + return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), ""); + } + + // Column not found in target table. Its a new column. Its schema is map + valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, ""); + valFSList.add(valFS); + return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, new HCatSchema(valFSList), ""); } - public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { - if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { - return true; - } - // Column was not found in table schema. Its a new column - List tupSchema = bagFieldSchema.schema.getFields(); - if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { - return true; - } - return false; + default: + throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE); } + } + @Override + public void prepareToWrite(RecordWriter writer) throws IOException { + this.writer = writer; + computedSchema = (HCatSchema) ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).getProperty(COMPUTED_OUTPUT_SCHEMA)); + } - private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema) throws FrontendException, HCatException { - byte type = fSchema.type; - switch (type) { - - case DataType.CHARARRAY: - case DataType.BIGCHARARRAY: - return new HCatFieldSchema(fSchema.alias, Type.STRING, null); - - case DataType.INTEGER: - if (hcatFieldSchema != null) { - if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) { - throw new FrontendException("Unsupported type: " + type + " in Pig's schema", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getType(), null); - } else { - return new HCatFieldSchema(fSchema.alias, Type.INT, null); - } - - case DataType.LONG: - return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null); - - case DataType.FLOAT: - return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null); - - case DataType.DOUBLE: - return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null); - - case DataType.BYTEARRAY: - return new HCatFieldSchema(fSchema.alias, Type.BINARY, null); - - case DataType.BOOLEAN: - return new HCatFieldSchema(fSchema.alias, Type.BOOLEAN, null); - - case DataType.BAG: - Schema bagSchema = fSchema.schema; - List arrFields = new ArrayList(1); - FieldSchema field; - // Find out if we need to throw away the tuple or not. - if (removeTupleFromBag(hcatFieldSchema, fSchema)) { - field = bagSchema.getField(0).schema.getField(0); - } else { - field = bagSchema.getField(0); - } - arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0))); - return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), ""); - - case DataType.TUPLE: - List fieldNames = new ArrayList(); - List hcatFSs = new ArrayList(); - HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); - List fields = fSchema.schema.getFields(); - for (int i = 0; i < fields.size(); i++) { - FieldSchema fieldSchema = fields.get(i); - fieldNames.add(fieldSchema.alias); - hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i))); - } - return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), ""); - - case DataType.MAP: { - // Pig's schema contain no type information about map's keys and - // values. So, if its a new column assume if its existing - // return whatever is contained in the existing column. - - HCatFieldSchema valFS; - List valFSList = new ArrayList(1); - - if (hcatFieldSchema != null) { - return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), ""); - } - - // Column not found in target table. Its a new column. Its schema is map - valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, ""); - valFSList.add(valFS); - return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, new HCatSchema(valFSList), ""); - } + @Override + public void putNext(Tuple tuple) throws IOException { - default: - throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE); - } - } + List outgoing = new ArrayList(tuple.size()); - @Override - public void prepareToWrite(RecordWriter writer) throws IOException { - this.writer = writer; - computedSchema = (HCatSchema) ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).getProperty(COMPUTED_OUTPUT_SCHEMA)); + int i = 0; + for (HCatFieldSchema fSchema : computedSchema.getFields()) { + outgoing.add(getJavaObj(tuple.get(i++), fSchema)); + } + try { + writer.write(null, new DefaultHCatRecord(outgoing)); + } catch (InterruptedException e) { + throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); } + } - @Override - public void putNext(Tuple tuple) throws IOException { + private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException { + try { - List outgoing = new ArrayList(tuple.size()); + // The real work-horse. Spend time and energy in this method if there is + // need to keep HCatStorer lean and go fast. + Type type = hcatFS.getType(); + switch (type) { - int i = 0; - for (HCatFieldSchema fSchema : computedSchema.getFields()) { - outgoing.add(getJavaObj(tuple.get(i++), fSchema)); + case BINARY: + if (pigObj == null) { + return null; } - try { - writer.write(null, new DefaultHCatRecord(outgoing)); - } catch (InterruptedException e) { - throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); + return ((DataByteArray) pigObj).get(); + + case STRUCT: + if (pigObj == null) { + return null; } - } + HCatSchema structSubSchema = hcatFS.getStructSubSchema(); + // Unwrap the tuple. + List all = ((Tuple) pigObj).getAll(); + ArrayList converted = new ArrayList(all.size()); + for (int i = 0; i < all.size(); i++) { + converted.add(getJavaObj(all.get(i), structSubSchema.get(i))); + } + return converted; - private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException { - try { - - // The real work-horse. Spend time and energy in this method if there is - // need to keep HCatStorer lean and go fast. - Type type = hcatFS.getType(); - switch (type) { - - case BINARY: - if (pigObj == null) { - return null; - } - return ((DataByteArray) pigObj).get(); - - case STRUCT: - if (pigObj == null) { - return null; - } - HCatSchema structSubSchema = hcatFS.getStructSubSchema(); - // Unwrap the tuple. - List all = ((Tuple) pigObj).getAll(); - ArrayList converted = new ArrayList(all.size()); - for (int i = 0; i < all.size(); i++) { - converted.add(getJavaObj(all.get(i), structSubSchema.get(i))); - } - return converted; - - case ARRAY: - if (pigObj == null) { - return null; - } - // Unwrap the bag. - DataBag pigBag = (DataBag) pigObj; - HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); - boolean needTuple = tupFS.getType() == Type.STRUCT; - List bagContents = new ArrayList((int) pigBag.size()); - Iterator bagItr = pigBag.iterator(); - - while (bagItr.hasNext()) { - // If there is only one element in tuple contained in bag, we throw away the tuple. - bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS)); - - } - return bagContents; - case MAP: - if (pigObj == null) { - return null; - } - Map pigMap = (Map) pigObj; - Map typeMap = new HashMap(); - for (Entry entry : pigMap.entrySet()) { - // the value has a schema and not a FieldSchema - typeMap.put( - // Schema validation enforces that the Key is a String - (String) entry.getKey(), - getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0))); - } - return typeMap; - case STRING: - case INT: - case BIGINT: - case FLOAT: - case DOUBLE: - return pigObj; - case SMALLINT: - if (pigObj == null) { - return null; - } - if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) { - throw new BackendException("Value " + pigObj + " is outside the bounds of column " + - hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - return ((Integer) pigObj).shortValue(); - case TINYINT: - if (pigObj == null) { - return null; - } - if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) { - throw new BackendException("Value " + pigObj + " is outside the bounds of column " + - hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - return ((Integer) pigObj).byteValue(); - case BOOLEAN: - if (pigObj == null) { - LOG.debug( "HCatBaseStorer.getJavaObj(BOOLEAN): obj null, bailing early" ); - return null; - } - - if( pigObj instanceof String ) { - if( ((String)pigObj).trim().compareTo("0") == 0 ) { - return Boolean.FALSE; - } - if( ((String)pigObj).trim().compareTo("1") == 0 ) { - return Boolean.TRUE; - } - - throw new BackendException( - "Unexpected type " + type + " for value " + pigObj - + (pigObj == null ? "" : " of class " - + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); - } - - return Boolean.parseBoolean( pigObj.toString() ); - default: - throw new BackendException("Unexpected type " + type + " for value " + pigObj - + (pigObj == null ? "" : " of class " - + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); - } - } catch (BackendException e) { - // provide the path to the field in the error message - throw new BackendException( - (hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), - e.getCause() == null ? e : e.getCause()); + case ARRAY: + if (pigObj == null) { + return null; } - } + // Unwrap the bag. + DataBag pigBag = (DataBag) pigObj; + HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); + boolean needTuple = tupFS.getType() == Type.STRUCT; + List bagContents = new ArrayList((int) pigBag.size()); + Iterator bagItr = pigBag.iterator(); - @Override - public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { + while (bagItr.hasNext()) { + // If there is only one element in tuple contained in bag, we throw away the tuple. + bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS)); - // Need to necessarily override this method since default impl assumes HDFS - // based location string. - return location; - } + } + return bagContents; + case MAP: + if (pigObj == null) { + return null; + } + Map pigMap = (Map) pigObj; + Map typeMap = new HashMap(); + for (Entry entry : pigMap.entrySet()) { + // the value has a schema and not a FieldSchema + typeMap.put( + // Schema validation enforces that the Key is a String + (String) entry.getKey(), + getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0))); + } + return typeMap; + case STRING: + case INT: + case BIGINT: + case FLOAT: + case DOUBLE: + return pigObj; + case SMALLINT: + if (pigObj == null) { + return null; + } + if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) { + throw new BackendException("Value " + pigObj + " is outside the bounds of column " + + hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + return ((Integer) pigObj).shortValue(); + case TINYINT: + if (pigObj == null) { + return null; + } + if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) { + throw new BackendException("Value " + pigObj + " is outside the bounds of column " + + hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + return ((Integer) pigObj).byteValue(); + case BOOLEAN: + if (pigObj == null) { + LOG.debug( "HCatBaseStorer.getJavaObj(BOOLEAN): obj null, bailing early" ); + return null; + } - @Override - public void setStoreFuncUDFContextSignature(String signature) { - sign = signature; + if( pigObj instanceof String ) { + if( ((String)pigObj).trim().compareTo("0") == 0 ) { + return Boolean.FALSE; + } + if( ((String)pigObj).trim().compareTo("1") == 0 ) { + return Boolean.TRUE; + } + + throw new BackendException( + "Unexpected type " + type + " for value " + pigObj + + (pigObj == null ? "" : " of class " + + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); + } + + return Boolean.parseBoolean( pigObj.toString() ); + default: + throw new BackendException("Unexpected type " + type + " for value " + pigObj + + (pigObj == null ? "" : " of class " + + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); + } + } catch (BackendException e) { + // provide the path to the field in the error message + throw new BackendException( + (hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), + e.getCause() == null ? e : e.getCause()); } + } + @Override + public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { - protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { + // Need to necessarily override this method since default impl assumes HDFS + // based location string. + return location; + } - // Iterate through all the elements in Pig Schema and do validations as - // dictated by semantics, consult HCatSchema of table when need be. + @Override + public void setStoreFuncUDFContextSignature(String signature) { + sign = signature; + } - for (FieldSchema pigField : pigSchema.getFields()) { - HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); - validateSchema(pigField, hcatField); - } - try { - PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); - } catch (IOException e) { - throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - } + protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { + // Iterate through all the elements in Pig Schema and do validations as + // dictated by semantics, consult HCatSchema of table when need be. - private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField) - throws HCatException, FrontendException { - validateAlias(pigField.alias); - byte type = pigField.type; - if (DataType.isComplex(type)) { - switch (type) { - - case DataType.MAP: - if (hcatField != null) { - if (hcatField.getMapKeyType() != Type.STRING) { - throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); - } - // Map values can be primitive or complex - } - break; - - case DataType.BAG: - HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema(); - for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { - validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema)); - } - break; - - case DataType.TUPLE: - HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema(); - for (FieldSchema innerField : pigField.schema.getFields()) { - validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema)); - } - break; - - default: - throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE); - } - } + for (FieldSchema pigField : pigSchema.getFields()) { + HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); + validateSchema(pigField, hcatField); } - private void validateAlias(String alias) throws FrontendException { - if (alias == null) { - throw new FrontendException("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.", PigHCatUtil.PIG_EXCEPTION_CODE); + try { + PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); + } catch (IOException e) { + throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); + } + } + + + private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField) + throws HCatException, FrontendException { + validateAlias(pigField.alias); + byte type = pigField.type; + if (DataType.isComplex(type)) { + switch (type) { + + case DataType.MAP: + if (hcatField != null) { + if (hcatField.getMapKeyType() != Type.STRING) { + throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); + } + // Map values can be primitive or complex } - if (alias.matches(".*[A-Z]+.*")) { - throw new FrontendException("Column names should all be in lowercase. Invalid name found: " + alias, PigHCatUtil.PIG_EXCEPTION_CODE); + break; + + case DataType.BAG: + HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema(); + for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { + validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema)); } - } + break; - // Finds column by name in HCatSchema, if not found returns null. - private HCatFieldSchema getColFromSchema(String alias, HCatSchema tblSchema) { - if (tblSchema != null) { - for (HCatFieldSchema hcatField : tblSchema.getFields()) { - if (hcatField != null && hcatField.getName() != null && hcatField.getName().equalsIgnoreCase(alias)) { - return hcatField; - } - } + case DataType.TUPLE: + HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema(); + for (FieldSchema innerField : pigField.schema.getFields()) { + validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema)); } - // Its a new column - return null; - } + break; - @Override - public void cleanupOnFailure(String location, Job job) throws IOException { - // No-op. + default: + throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE); + } } + } - @Override - public void storeStatistics(ResourceStatistics stats, String arg1, Job job) throws IOException { + private void validateAlias(String alias) throws FrontendException { + if (alias == null) { + throw new FrontendException("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.", PigHCatUtil.PIG_EXCEPTION_CODE); + } + if (alias.matches(".*[A-Z]+.*")) { + throw new FrontendException("Column names should all be in lowercase. Invalid name found: " + alias, PigHCatUtil.PIG_EXCEPTION_CODE); + } + } + + // Finds column by name in HCatSchema, if not found returns null. + private HCatFieldSchema getColFromSchema(String alias, HCatSchema tblSchema) { + if (tblSchema != null) { + for (HCatFieldSchema hcatField : tblSchema.getFields()) { + if (hcatField != null && hcatField.getName() != null && hcatField.getName().equalsIgnoreCase(alias)) { + return hcatField; + } + } } + // Its a new column + return null; + } + + @Override + public void cleanupOnFailure(String location, Job job) throws IOException { + // No-op. + } + + @Override + public void storeStatistics(ResourceStatistics stats, String arg1, Job job) throws IOException { + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java index 5d01a7e..a4f93c7 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java @@ -52,222 +52,222 @@ public class HCatLoader extends HCatBaseLoader { - private static final String PARTITION_FILTER = "partition.filter"; // for future use + private static final String PARTITION_FILTER = "partition.filter"; // for future use - private HCatInputFormat hcatInputFormat = null; - private String dbName; - private String tableName; - private String hcatServerUri; - private String partitionFilterString; - private final PigHCatUtil phutil = new PigHCatUtil(); + private HCatInputFormat hcatInputFormat = null; + private String dbName; + private String tableName; + private String hcatServerUri; + private String partitionFilterString; + private final PigHCatUtil phutil = new PigHCatUtil(); - // Signature for wrapped loader, see comments in LoadFuncBasedInputDriver.initialize - final public static String INNER_SIGNATURE = "hcatloader.inner.signature"; - final public static String INNER_SIGNATURE_PREFIX = "hcatloader_inner_signature"; - // A hash map which stores job credentials. The key is a signature passed by Pig, which is - //unique to the load func and input file name (table, in our case). - private static Map jobCredentials = new HashMap(); + // Signature for wrapped loader, see comments in LoadFuncBasedInputDriver.initialize + final public static String INNER_SIGNATURE = "hcatloader.inner.signature"; + final public static String INNER_SIGNATURE_PREFIX = "hcatloader_inner_signature"; + // A hash map which stores job credentials. The key is a signature passed by Pig, which is + //unique to the load func and input file name (table, in our case). + private static Map jobCredentials = new HashMap(); - @Override - public InputFormat getInputFormat() throws IOException { - if (hcatInputFormat == null) { - hcatInputFormat = new HCatInputFormat(); - } - return hcatInputFormat; - } - - @Override - public String relativeToAbsolutePath(String location, Path curDir) throws IOException { - return location; + @Override + public InputFormat getInputFormat() throws IOException { + if (hcatInputFormat == null) { + hcatInputFormat = new HCatInputFormat(); } + return hcatInputFormat; + } - @Override - public void setLocation(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); + @Override + public String relativeToAbsolutePath(String location, Path curDir) throws IOException { + return location; + } - UDFContext udfContext = UDFContext.getUDFContext(); - Properties udfProps = udfContext.getUDFProperties(this.getClass(), - new String[]{signature}); - job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); - Pair dbTablePair = PigHCatUtil.getDBTableNames(location); - dbName = dbTablePair.first; - tableName = dbTablePair.second; + @Override + public void setLocation(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); - RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps - .get(PRUNE_PROJECTION_INFO); - // get partitionFilterString stored in the UDFContext - it would have - // been stored there by an earlier call to setPartitionFilter - // call setInput on HCatInputFormat only in the frontend because internally - // it makes calls to the hcat server - we don't want these to happen in - // the backend - // in the hadoop front end mapred.task.id property will not be set in - // the Configuration - if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { - for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { - PigHCatUtil.getConfigFromUDFProperties(udfProps, - job.getConfiguration(), emr.nextElement().toString()); - } - if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { - //Combine credentials and credentials from job takes precedence for freshness - Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); - crd.addAll(job.getCredentials()); - job.getCredentials().addAll(crd); - } - } else { - Job clone = new Job(job.getConfiguration()); - HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); + UDFContext udfContext = UDFContext.getUDFContext(); + Properties udfProps = udfContext.getUDFProperties(this.getClass(), + new String[]{signature}); + job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); + Pair dbTablePair = PigHCatUtil.getDBTableNames(location); + dbName = dbTablePair.first; + tableName = dbTablePair.second; - // We will store all the new /changed properties in the job in the - // udf context, so the the HCatInputFormat.setInput method need not - //be called many times. - for (Entry keyValue : job.getConfiguration()) { - String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); - if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { - udfProps.put(keyValue.getKey(), keyValue.getValue()); - } - } - udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); + RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps + .get(PRUNE_PROJECTION_INFO); + // get partitionFilterString stored in the UDFContext - it would have + // been stored there by an earlier call to setPartitionFilter + // call setInput on HCatInputFormat only in the frontend because internally + // it makes calls to the hcat server - we don't want these to happen in + // the backend + // in the hadoop front end mapred.task.id property will not be set in + // the Configuration + if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { + for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { + PigHCatUtil.getConfigFromUDFProperties(udfProps, + job.getConfiguration(), emr.nextElement().toString()); + } + if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { + //Combine credentials and credentials from job takes precedence for freshness + Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); + crd.addAll(job.getCredentials()); + job.getCredentials().addAll(crd); + } + } else { + Job clone = new Job(job.getConfiguration()); + HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); - //Store credentials in a private hash map and not the udf context to - // make sure they are not public. - Credentials crd = new Credentials(); - crd.addAll(job.getCredentials()); - jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); + // We will store all the new /changed properties in the job in the + // udf context, so the the HCatInputFormat.setInput method need not + //be called many times. + for (Entry keyValue : job.getConfiguration()) { + String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); + if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { + udfProps.put(keyValue.getKey(), keyValue.getValue()); } + } + udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); - // Need to also push projections by calling setOutputSchema on - // HCatInputFormat - we have to get the RequiredFields information - // from the UdfContext, translate it to an Schema and then pass it - // The reason we do this here is because setLocation() is called by - // Pig runtime at InputFormat.getSplits() and - // InputFormat.createRecordReader() time - we are not sure when - // HCatInputFormat needs to know about pruned projections - so doing it - // here will ensure we communicate to HCatInputFormat about pruned - // projections at getSplits() and createRecordReader() time + //Store credentials in a private hash map and not the udf context to + // make sure they are not public. + Credentials crd = new Credentials(); + crd.addAll(job.getCredentials()); + jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); + } - if (requiredFieldsInfo != null) { - // convert to hcatschema and pass to HCatInputFormat - try { - outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); - HCatInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } else { - // else - this means pig's optimizer never invoked the pushProjection - // method - so we need all fields and hence we should not call the - // setOutputSchema on HCatInputFormat - if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { - try { - HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); - outputSchema = hcatTableSchema; - HCatInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } - } + // Need to also push projections by calling setOutputSchema on + // HCatInputFormat - we have to get the RequiredFields information + // from the UdfContext, translate it to an Schema and then pass it + // The reason we do this here is because setLocation() is called by + // Pig runtime at InputFormat.getSplits() and + // InputFormat.createRecordReader() time - we are not sure when + // HCatInputFormat needs to know about pruned projections - so doing it + // here will ensure we communicate to HCatInputFormat about pruned + // projections at getSplits() and createRecordReader() time + if (requiredFieldsInfo != null) { + // convert to hcatschema and pass to HCatInputFormat + try { + outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); + HCatInputFormat.setOutputSchema(job, outputSchema); + } catch (Exception e) { + throw new IOException(e); + } + } else { + // else - this means pig's optimizer never invoked the pushProjection + // method - so we need all fields and hence we should not call the + // setOutputSchema on HCatInputFormat + if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { + try { + HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); + outputSchema = hcatTableSchema; + HCatInputFormat.setOutputSchema(job, outputSchema); + } catch (Exception e) { + throw new IOException(e); + } + } } - @Override - public String[] getPartitionKeys(String location, Job job) - throws IOException { - Table table = phutil.getTable(location, - hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); - List tablePartitionKeys = table.getPartitionKeys(); - String[] partitionKeys = new String[tablePartitionKeys.size()]; - for (int i = 0; i < tablePartitionKeys.size(); i++) { - partitionKeys[i] = tablePartitionKeys.get(i).getName(); - } - return partitionKeys; + } + + @Override + public String[] getPartitionKeys(String location, Job job) + throws IOException { + Table table = phutil.getTable(location, + hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), + PigHCatUtil.getHCatServerPrincipal(job)); + List tablePartitionKeys = table.getPartitionKeys(); + String[] partitionKeys = new String[tablePartitionKeys.size()]; + for (int i = 0; i < tablePartitionKeys.size(); i++) { + partitionKeys[i] = tablePartitionKeys.get(i).getName(); } + return partitionKeys; + } - @Override - public ResourceSchema getSchema(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); + @Override + public ResourceSchema getSchema(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); - Table table = phutil.getTable(location, - hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); - HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); - try { - PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); - } catch (IOException e) { - throw new PigException( - "Table schema incompatible for reading through HCatLoader :" + e.getMessage() - + ";[Table schema was " + hcatTableSchema.toString() + "]" - , PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema); - outputSchema = hcatTableSchema; - return PigHCatUtil.getResourceSchema(hcatTableSchema); + Table table = phutil.getTable(location, + hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), + PigHCatUtil.getHCatServerPrincipal(job)); + HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); + try { + PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); + } catch (IOException e) { + throw new PigException( + "Table schema incompatible for reading through HCatLoader :" + e.getMessage() + + ";[Table schema was " + hcatTableSchema.toString() + "]" + , PigHCatUtil.PIG_EXCEPTION_CODE, e); } + storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema); + outputSchema = hcatTableSchema; + return PigHCatUtil.getResourceSchema(hcatTableSchema); + } - @Override - public void setPartitionFilter(Expression partitionFilter) throws IOException { - // convert the partition filter expression into a string expected by - // hcat and pass it in setLocation() + @Override + public void setPartitionFilter(Expression partitionFilter) throws IOException { + // convert the partition filter expression into a string expected by + // hcat and pass it in setLocation() - partitionFilterString = getHCatComparisonString(partitionFilter); + partitionFilterString = getHCatComparisonString(partitionFilter); - // store this in the udf context so we can get it later - storeInUDFContext(signature, - PARTITION_FILTER, partitionFilterString); - } + // store this in the udf context so we can get it later + storeInUDFContext(signature, + PARTITION_FILTER, partitionFilterString); + } - /** - * Get statistics about the data to be loaded. Only input data size is implemented at this time. - */ - @Override - public ResourceStatistics getStatistics(String location, Job job) throws IOException { - try { - ResourceStatistics stats = new ResourceStatistics(); - InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( - job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); - stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024); - return stats; - } catch (Exception e) { - throw new IOException(e); - } + /** + * Get statistics about the data to be loaded. Only input data size is implemented at this time. + */ + @Override + public ResourceStatistics getStatistics(String location, Job job) throws IOException { + try { + ResourceStatistics stats = new ResourceStatistics(); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( + job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024); + return stats; + } catch (Exception e) { + throw new IOException(e); } + } - private String getPartitionFilterString() { - if (partitionFilterString == null) { - Properties props = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[]{signature}); - partitionFilterString = props.getProperty(PARTITION_FILTER); - } - return partitionFilterString; + private String getPartitionFilterString() { + if (partitionFilterString == null) { + Properties props = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[]{signature}); + partitionFilterString = props.getProperty(PARTITION_FILTER); } + return partitionFilterString; + } - private String getHCatComparisonString(Expression expr) { - if (expr instanceof BinaryExpression) { - // call getHCatComparisonString on lhs and rhs, and and join the - // results with OpType string + private String getHCatComparisonString(Expression expr) { + if (expr instanceof BinaryExpression) { + // call getHCatComparisonString on lhs and rhs, and and join the + // results with OpType string - // we can just use OpType.toString() on all Expression types except - // Equal, NotEqualt since Equal has '==' in toString() and - // we need '=' - String opStr = null; - switch (expr.getOpType()) { - case OP_EQ: - opStr = " = "; - break; - default: - opStr = expr.getOpType().toString(); - } - BinaryExpression be = (BinaryExpression) expr; - return "(" + getHCatComparisonString(be.getLhs()) + - opStr + - getHCatComparisonString(be.getRhs()) + ")"; - } else { - // should be a constant or column - return expr.toString(); - } + // we can just use OpType.toString() on all Expression types except + // Equal, NotEqualt since Equal has '==' in toString() and + // we need '=' + String opStr = null; + switch (expr.getOpType()) { + case OP_EQ: + opStr = " = "; + break; + default: + opStr = expr.getOpType().toString(); + } + BinaryExpression be = (BinaryExpression) expr; + return "(" + getHCatComparisonString(be.getLhs()) + + opStr + + getHCatComparisonString(be.getRhs()) + ")"; + } else { + // should be a constant or column + return expr.toString(); } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatStorer.java index 30ef8a9..062f332 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatStorer.java @@ -51,117 +51,117 @@ public class HCatStorer extends HCatBaseStorer { - // Signature for wrapped storer, see comments in LoadFuncBasedInputDriver.initialize - final public static String INNER_SIGNATURE = "hcatstorer.inner.signature"; - final public static String INNER_SIGNATURE_PREFIX = "hcatstorer_inner_signature"; - // A hash map which stores job credentials. The key is a signature passed by Pig, which is - //unique to the store func and out file name (table, in our case). - private static Map jobCredentials = new HashMap(); - - - public HCatStorer(String partSpecs, String schema) throws Exception { - super(partSpecs, schema); - } - - public HCatStorer(String partSpecs) throws Exception { - this(partSpecs, null); - } - - public HCatStorer() throws Exception { - this(null, null); - } - - @Override - public OutputFormat getOutputFormat() throws IOException { - return new HCatOutputFormat(); - } - - @Override - public void setStoreLocation(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false); - - Configuration config = job.getConfiguration(); - config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); - Properties udfProps = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[]{sign}); - String[] userStr = location.split("\\."); - - if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { - for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { - PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); - } - Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); - if (crd != null) { - job.getCredentials().addAll(crd); - } - } else { - Job clone = new Job(job.getConfiguration()); - OutputJobInfo outputJobInfo; - if (userStr.length == 2) { - outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); - } else if (userStr.length == 1) { - outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); - } else { - throw new FrontendException("location " + location - + " is invalid. It must be of the form [db.]table", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); - if (schema != null) { - pigSchema = schema; - } - if (pigSchema == null) { - throw new FrontendException( - "Schema for data cannot be determined.", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); - if (externalLocation != null) { - outputJobInfo.setLocation(externalLocation); - } - try { - HCatOutputFormat.setOutput(job, outputJobInfo); - } catch (HCatException he) { - // pass the message to the user - essentially something about - // the table - // information passed to HCatOutputFormat was not right - throw new PigException(he.getMessage(), - PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job); - try { - doSchemaValidations(pigSchema, hcatTblSchema); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); - HCatOutputFormat.setSchema(job, computedSchema); - udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); - - // We will store all the new /changed properties in the job in the - // udf context, so the the HCatOutputFormat.setOutput and setSchema - // methods need not be called many times. - for (Entry keyValue : job.getConfiguration()) { - String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); - if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { - udfProps.put(keyValue.getKey(), keyValue.getValue()); - } - } - //Store credentials in a private hash map and not the udf context to - // make sure they are not public. - jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); - udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); + // Signature for wrapped storer, see comments in LoadFuncBasedInputDriver.initialize + final public static String INNER_SIGNATURE = "hcatstorer.inner.signature"; + final public static String INNER_SIGNATURE_PREFIX = "hcatstorer_inner_signature"; + // A hash map which stores job credentials. The key is a signature passed by Pig, which is + //unique to the store func and out file name (table, in our case). + private static Map jobCredentials = new HashMap(); + + + public HCatStorer(String partSpecs, String schema) throws Exception { + super(partSpecs, schema); + } + + public HCatStorer(String partSpecs) throws Exception { + this(partSpecs, null); + } + + public HCatStorer() throws Exception { + this(null, null); + } + + @Override + public OutputFormat getOutputFormat() throws IOException { + return new HCatOutputFormat(); + } + + @Override + public void setStoreLocation(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false); + + Configuration config = job.getConfiguration(); + config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); + Properties udfProps = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[]{sign}); + String[] userStr = location.split("\\."); + + if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { + for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { + PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); + } + Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); + if (crd != null) { + job.getCredentials().addAll(crd); + } + } else { + Job clone = new Job(job.getConfiguration()); + OutputJobInfo outputJobInfo; + if (userStr.length == 2) { + outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); + } else if (userStr.length == 1) { + outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); + } else { + throw new FrontendException("location " + location + + " is invalid. It must be of the form [db.]table", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); + if (schema != null) { + pigSchema = schema; + } + if (pigSchema == null) { + throw new FrontendException( + "Schema for data cannot be determined.", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); + if (externalLocation != null) { + outputJobInfo.setLocation(externalLocation); + } + try { + HCatOutputFormat.setOutput(job, outputJobInfo); + } catch (HCatException he) { + // pass the message to the user - essentially something about + // the table + // information passed to HCatOutputFormat was not right + throw new PigException(he.getMessage(), + PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job); + try { + doSchemaValidations(pigSchema, hcatTblSchema); + } catch (HCatException he) { + throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); + HCatOutputFormat.setSchema(job, computedSchema); + udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); + + // We will store all the new /changed properties in the job in the + // udf context, so the the HCatOutputFormat.setOutput and setSchema + // methods need not be called many times. + for (Entry keyValue : job.getConfiguration()) { + String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); + if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { + udfProps.put(keyValue.getKey(), keyValue.getValue()); } + } + //Store credentials in a private hash map and not the udf context to + // make sure they are not public. + jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); + udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); } + } - @Override - public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { - ShimLoader.getHadoopShims().getHCatShim().commitJob(getOutputFormat(), job); - } + @Override + public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { + ShimLoader.getHadoopShims().getHCatShim().commitJob(getOutputFormat(), job); + } - @Override - public void cleanupOnFailure(String location, Job job) throws IOException { - ShimLoader.getHadoopShims().getHCatShim().abortJob(getOutputFormat(), job); - } + @Override + public void cleanupOnFailure(String location, Job job) throws IOException { + ShimLoader.getHadoopShims().getHCatShim().abortJob(getOutputFormat(), job); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java index 9c849cc..ed8d7c3 100644 --- a/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java +++ b/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java @@ -61,432 +61,432 @@ class PigHCatUtil { - private static final Logger LOG = LoggerFactory.getLogger(PigHCatUtil.class); - - static final int PIG_EXCEPTION_CODE = 1115; // http://wiki.apache.org/pig/PigErrorHandlingFunctionalSpecification#Error_codes - private static final String DEFAULT_DB = MetaStoreUtils.DEFAULT_DATABASE_NAME; - - private final Map, Table> hcatTableCache = - new HashMap, Table>(); - - private static final TupleFactory tupFac = TupleFactory.getInstance(); - - private static boolean pigHasBooleanSupport = false; - - /** - * Determine if the current Pig version supports boolean columns. This works around a - * dependency conflict preventing HCatalog from requiring a version of Pig with boolean - * field support and should be removed once HCATALOG-466 has been resolved. - */ - static { - // DETAILS: - // - // PIG-1429 added support for boolean fields, which shipped in 0.10.0; - // this version of Pig depends on antlr 3.4. - // - // HCatalog depends heavily on Hive, which at this time uses antlr 3.0.1. - // - // antlr 3.0.1 and 3.4 are incompatible, so Pig 0.10.0 and Hive cannot be depended on in the - // same project. Pig 0.8.0 did not use antlr for its parser and can coexist with Hive, - // so that Pig version is depended on by HCatalog at this time. - try { - Schema schema = Utils.getSchemaFromString("myBooleanField: boolean"); - pigHasBooleanSupport = (schema.getField("myBooleanField").type == DataType.BOOLEAN); - } catch (Throwable e) { - // pass - } - - if (!pigHasBooleanSupport) { - LOG.info("This version of Pig does not support boolean fields. To enable " - + "boolean-to-integer conversion, set the " - + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER - + "=true configuration parameter."); - } + private static final Logger LOG = LoggerFactory.getLogger(PigHCatUtil.class); + + static final int PIG_EXCEPTION_CODE = 1115; // http://wiki.apache.org/pig/PigErrorHandlingFunctionalSpecification#Error_codes + private static final String DEFAULT_DB = MetaStoreUtils.DEFAULT_DATABASE_NAME; + + private final Map, Table> hcatTableCache = + new HashMap, Table>(); + + private static final TupleFactory tupFac = TupleFactory.getInstance(); + + private static boolean pigHasBooleanSupport = false; + + /** + * Determine if the current Pig version supports boolean columns. This works around a + * dependency conflict preventing HCatalog from requiring a version of Pig with boolean + * field support and should be removed once HCATALOG-466 has been resolved. + */ + static { + // DETAILS: + // + // PIG-1429 added support for boolean fields, which shipped in 0.10.0; + // this version of Pig depends on antlr 3.4. + // + // HCatalog depends heavily on Hive, which at this time uses antlr 3.0.1. + // + // antlr 3.0.1 and 3.4 are incompatible, so Pig 0.10.0 and Hive cannot be depended on in the + // same project. Pig 0.8.0 did not use antlr for its parser and can coexist with Hive, + // so that Pig version is depended on by HCatalog at this time. + try { + Schema schema = Utils.getSchemaFromString("myBooleanField: boolean"); + pigHasBooleanSupport = (schema.getField("myBooleanField").type == DataType.BOOLEAN); + } catch (Throwable e) { + // pass } - static public boolean pigHasBooleanSupport(){ - return pigHasBooleanSupport; + if (!pigHasBooleanSupport) { + LOG.info("This version of Pig does not support boolean fields. To enable " + + "boolean-to-integer conversion, set the " + + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER + + "=true configuration parameter."); } - - static public Pair getDBTableNames(String location) throws IOException { - // the location string will be of the form: - // .
- parse it and - // communicate the information to HCatInputFormat - - try { - return HCatUtil.getDbAndTableName(location); - } catch (IOException e) { - String locationErrMsg = "The input location in load statement " + - "should be of the form " + - ".
or
. Got " + location; - throw new PigException(locationErrMsg, PIG_EXCEPTION_CODE); - } + } + + static public boolean pigHasBooleanSupport(){ + return pigHasBooleanSupport; + } + + static public Pair getDBTableNames(String location) throws IOException { + // the location string will be of the form: + // .
- parse it and + // communicate the information to HCatInputFormat + + try { + return HCatUtil.getDbAndTableName(location); + } catch (IOException e) { + String locationErrMsg = "The input location in load statement " + + "should be of the form " + + ".
or
. Got " + location; + throw new PigException(locationErrMsg, PIG_EXCEPTION_CODE); } + } - static public String getHCatServerUri(Job job) { + static public String getHCatServerUri(Job job) { - return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname); - } + return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname); + } - static public String getHCatServerPrincipal(Job job) { + static public String getHCatServerPrincipal(Job job) { - return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL); - } + return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL); + } - private static HiveMetaStoreClient getHiveMetaClient(String serverUri, - String serverKerberosPrincipal, Class clazz) throws Exception { - HiveConf hiveConf = new HiveConf(clazz); + private static HiveMetaStoreClient getHiveMetaClient(String serverUri, + String serverKerberosPrincipal, Class clazz) throws Exception { + HiveConf hiveConf = new HiveConf(clazz); - if (serverUri != null) { - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); - } + if (serverUri != null) { + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); + } - if (serverKerberosPrincipal != null) { - hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal); - } + if (serverKerberosPrincipal != null) { + hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); + hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal); + } - try { - return HCatUtil.getHiveClient(hiveConf); - } catch (Exception e) { - throw new Exception("Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e); - } + try { + return HCatUtil.getHiveClient(hiveConf); + } catch (Exception e) { + throw new Exception("Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e); } + } - HCatSchema getHCatSchema(List fields, String signature, Class classForUDFCLookup) throws IOException { - if (fields == null) { - return null; - } + HCatSchema getHCatSchema(List fields, String signature, Class classForUDFCLookup) throws IOException { + if (fields == null) { + return null; + } - Properties props = UDFContext.getUDFContext().getUDFProperties( - classForUDFCLookup, new String[]{signature}); - HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); + Properties props = UDFContext.getUDFContext().getUDFProperties( + classForUDFCLookup, new String[]{signature}); + HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); - ArrayList fcols = new ArrayList(); - for (RequiredField rf : fields) { - fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); - } - return new HCatSchema(fcols); + ArrayList fcols = new ArrayList(); + for (RequiredField rf : fields) { + fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); + } + return new HCatSchema(fcols); + } + + public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException { + Pair loc_server = new Pair(location, hcatServerUri); + Table hcatTable = hcatTableCache.get(loc_server); + if (hcatTable != null) { + return hcatTable; } - public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException { - Pair loc_server = new Pair(location, hcatServerUri); - Table hcatTable = hcatTableCache.get(loc_server); - if (hcatTable != null) { - return hcatTable; - } - - Pair dbTablePair = PigHCatUtil.getDBTableNames(location); - String dbName = dbTablePair.first; - String tableName = dbTablePair.second; - Table table = null; - HiveMetaStoreClient client = null; - try { - client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class); - table = HCatUtil.getTable(client, dbName, tableName); - } catch (NoSuchObjectException nsoe) { - throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend - } catch (Exception e) { - throw new IOException(e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - hcatTableCache.put(loc_server, table); - return table; + Pair dbTablePair = PigHCatUtil.getDBTableNames(location); + String dbName = dbTablePair.first; + String tableName = dbTablePair.second; + Table table = null; + HiveMetaStoreClient client = null; + try { + client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class); + table = HCatUtil.getTable(client, dbName, tableName); + } catch (NoSuchObjectException nsoe) { + throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend + } catch (Exception e) { + throw new IOException(e); + } finally { + HCatUtil.closeHiveClientQuietly(client); } + hcatTableCache.put(loc_server, table); + return table; + } - public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException { + public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException { - List rfSchemaList = new ArrayList(); - for (HCatFieldSchema hfs : hcatSchema.getFields()) { - ResourceFieldSchema rfSchema; - rfSchema = getResourceSchemaFromFieldSchema(hfs); - rfSchemaList.add(rfSchema); - } - ResourceSchema rSchema = new ResourceSchema(); - rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0])); - return rSchema; - - } - - private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) - throws IOException { - ResourceFieldSchema rfSchema; - // if we are dealing with a bag or tuple column - need to worry about subschema - if (hfs.getType() == Type.STRUCT) { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(getTupleSubSchema(hfs)); - } else if (hfs.getType() == Type.ARRAY) { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(getBagSubSchema(hfs)); - } else { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(null); // no munging inner-schemas - } - return rfSchema; + List rfSchemaList = new ArrayList(); + for (HCatFieldSchema hfs : hcatSchema.getFields()) { + ResourceFieldSchema rfSchema; + rfSchema = getResourceSchemaFromFieldSchema(hfs); + rfSchemaList.add(rfSchema); + } + ResourceSchema rSchema = new ResourceSchema(); + rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0])); + return rSchema; + + } + + private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) + throws IOException { + ResourceFieldSchema rfSchema; + // if we are dealing with a bag or tuple column - need to worry about subschema + if (hfs.getType() == Type.STRUCT) { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(getTupleSubSchema(hfs)); + } else if (hfs.getType() == Type.ARRAY) { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(getBagSubSchema(hfs)); + } else { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(null); // no munging inner-schemas + } + return rfSchema; + } + + protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException { + // there are two cases - array and array> + // in either case the element type of the array is represented in a + // tuple field schema in the bag's field schema - the second case (struct) + // more naturally translates to the tuple - in the first case (array) + // we simulate the tuple by putting the single field in a tuple + + Properties props = UDFContext.getUDFContext().getClientSystemProps(); + String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT; + if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) { + innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME) + .replaceAll("FIELDNAME", hfs.getName()); + } + String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT; + if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) { + innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME) + .replaceAll("FIELDNAME", hfs.getName()); } - protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException { - // there are two cases - array and array> - // in either case the element type of the array is represented in a - // tuple field schema in the bag's field schema - the second case (struct) - // more naturally translates to the tuple - in the first case (array) - // we simulate the tuple by putting the single field in a tuple - - Properties props = UDFContext.getUDFContext().getClientSystemProps(); - String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT; - if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) { - innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME) - .replaceAll("FIELDNAME", hfs.getName()); - } - String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT; - if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) { - innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME) - .replaceAll("FIELDNAME", hfs.getName()); - } + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) + .setDescription("The tuple in the bag") + .setType(DataType.TUPLE); + HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); + if (arrayElementFieldSchema.getType() == Type.STRUCT) { + bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); + } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { + ResourceSchema s = new ResourceSchema(); + List lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); + s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); + bagSubFieldSchemas[0].setSchema(s); + } else { + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) + .setDescription("The inner field in the tuple in the bag") + .setType(getPigType(arrayElementFieldSchema)) + .setSchema(null); // the element type is not a tuple - so no subschema + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + } + ResourceSchema s = new ResourceSchema().setFields(bagSubFieldSchemas); + return s; - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) - .setDescription("The tuple in the bag") - .setType(DataType.TUPLE); - HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); - if (arrayElementFieldSchema.getType() == Type.STRUCT) { - bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); - } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { - ResourceSchema s = new ResourceSchema(); - List lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); - s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); - bagSubFieldSchemas[0].setSchema(s); - } else { - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) - .setDescription("The inner field in the tuple in the bag") - .setType(getPigType(arrayElementFieldSchema)) - .setSchema(null); // the element type is not a tuple - so no subschema - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - } - ResourceSchema s = new ResourceSchema().setFields(bagSubFieldSchemas); - return s; + } + private static ResourceSchema getTupleSubSchema(HCatFieldSchema hfs) throws IOException { + // for each struct subfield, create equivalent ResourceFieldSchema + ResourceSchema s = new ResourceSchema(); + List lrfs = new ArrayList(); + for (HCatFieldSchema subField : hfs.getStructSubSchema().getFields()) { + lrfs.add(getResourceSchemaFromFieldSchema(subField)); } - - private static ResourceSchema getTupleSubSchema(HCatFieldSchema hfs) throws IOException { - // for each struct subfield, create equivalent ResourceFieldSchema - ResourceSchema s = new ResourceSchema(); - List lrfs = new ArrayList(); - for (HCatFieldSchema subField : hfs.getStructSubSchema().getFields()) { - lrfs.add(getResourceSchemaFromFieldSchema(subField)); - } - s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); - return s; + s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); + return s; + } + + /** + * @param hfs the field schema of the column + * @return corresponding pig type + * @throws IOException + */ + static public byte getPigType(HCatFieldSchema hfs) throws IOException { + return getPigType(hfs.getType()); + } + + static public byte getPigType(Type type) throws IOException { + if (type == Type.STRING) { + return DataType.CHARARRAY; } - /** - * @param hfs the field schema of the column - * @return corresponding pig type - * @throws IOException - */ - static public byte getPigType(HCatFieldSchema hfs) throws IOException { - return getPigType(hfs.getType()); + if ((type == Type.INT) || (type == Type.SMALLINT) || (type == Type.TINYINT)) { + return DataType.INTEGER; } - static public byte getPigType(Type type) throws IOException { - if (type == Type.STRING) { - return DataType.CHARARRAY; - } - - if ((type == Type.INT) || (type == Type.SMALLINT) || (type == Type.TINYINT)) { - return DataType.INTEGER; - } + if (type == Type.ARRAY) { + return DataType.BAG; + } - if (type == Type.ARRAY) { - return DataType.BAG; - } + if (type == Type.STRUCT) { + return DataType.TUPLE; + } - if (type == Type.STRUCT) { - return DataType.TUPLE; - } + if (type == Type.MAP) { + return DataType.MAP; + } - if (type == Type.MAP) { - return DataType.MAP; - } + if (type == Type.BIGINT) { + return DataType.LONG; + } - if (type == Type.BIGINT) { - return DataType.LONG; - } + if (type == Type.FLOAT) { + return DataType.FLOAT; + } - if (type == Type.FLOAT) { - return DataType.FLOAT; - } + if (type == Type.DOUBLE) { + return DataType.DOUBLE; + } - if (type == Type.DOUBLE) { - return DataType.DOUBLE; - } + if (type == Type.BINARY) { + return DataType.BYTEARRAY; + } - if (type == Type.BINARY) { - return DataType.BYTEARRAY; - } + if (type == Type.BOOLEAN && pigHasBooleanSupport) { + return DataType.BOOLEAN; + } - if (type == Type.BOOLEAN && pigHasBooleanSupport) { - return DataType.BOOLEAN; - } + throw new PigException("HCatalog column type '" + type.toString() + + "' is not supported in Pig as a column type", PIG_EXCEPTION_CODE); + } - throw new PigException("HCatalog column type '" + type.toString() - + "' is not supported in Pig as a column type", PIG_EXCEPTION_CODE); + public static Tuple transformToTuple(HCatRecord hr, HCatSchema hs) throws Exception { + if (hr == null) { + return null; } - - public static Tuple transformToTuple(HCatRecord hr, HCatSchema hs) throws Exception { - if (hr == null) { - return null; - } - return transformToTuple(hr.getAll(), hs); - } - - @SuppressWarnings("unchecked") - public static Object extractPigObject(Object o, HCatFieldSchema hfs) throws Exception { - Object result; - Type itemType = hfs.getType(); - switch (itemType) { - case BINARY: - result = (o == null) ? null : new DataByteArray((byte[]) o); - break; - case STRUCT: - result = transformToTuple((List) o, hfs); - break; - case ARRAY: - result = transformToBag((List) o, hfs); - break; - case MAP: - result = transformToPigMap((Map) o, hfs); - break; - default: - result = o; - break; - } - return result; - } - - private static Tuple transformToTuple(List objList, HCatFieldSchema hfs) throws Exception { - try { - return transformToTuple(objList, hfs.getStructSubSchema()); - } catch (Exception e) { - if (hfs.getType() != Type.STRUCT) { - throw new Exception("Expected Struct type, got " + hfs.getType(), e); - } else { - throw e; - } - } + return transformToTuple(hr.getAll(), hs); + } + + @SuppressWarnings("unchecked") + public static Object extractPigObject(Object o, HCatFieldSchema hfs) throws Exception { + Object result; + Type itemType = hfs.getType(); + switch (itemType) { + case BINARY: + result = (o == null) ? null : new DataByteArray((byte[]) o); + break; + case STRUCT: + result = transformToTuple((List) o, hfs); + break; + case ARRAY: + result = transformToBag((List) o, hfs); + break; + case MAP: + result = transformToPigMap((Map) o, hfs); + break; + default: + result = o; + break; + } + return result; + } + + private static Tuple transformToTuple(List objList, HCatFieldSchema hfs) throws Exception { + try { + return transformToTuple(objList, hfs.getStructSubSchema()); + } catch (Exception e) { + if (hfs.getType() != Type.STRUCT) { + throw new Exception("Expected Struct type, got " + hfs.getType(), e); + } else { + throw e; + } } + } - private static Tuple transformToTuple(List objList, HCatSchema hs) throws Exception { - if (objList == null) { - return null; - } - Tuple t = tupFac.newTuple(objList.size()); - List subFields = hs.getFields(); - for (int i = 0; i < subFields.size(); i++) { - t.set(i, extractPigObject(objList.get(i), subFields.get(i))); - } - return t; + private static Tuple transformToTuple(List objList, HCatSchema hs) throws Exception { + if (objList == null) { + return null; } + Tuple t = tupFac.newTuple(objList.size()); + List subFields = hs.getFields(); + for (int i = 0; i < subFields.size(); i++) { + t.set(i, extractPigObject(objList.get(i), subFields.get(i))); + } + return t; + } - private static Map transformToPigMap(Map map, HCatFieldSchema hfs) throws Exception { - if (map == null) { - return null; - } + private static Map transformToPigMap(Map map, HCatFieldSchema hfs) throws Exception { + if (map == null) { + return null; + } - Map result = new HashMap(); - for (Entry entry : map.entrySet()) { - // since map key for Pig has to be Strings - result.put(entry.getKey().toString(), extractPigObject(entry.getValue(), hfs.getMapValueSchema().get(0))); - } - return result; + Map result = new HashMap(); + for (Entry entry : map.entrySet()) { + // since map key for Pig has to be Strings + result.put(entry.getKey().toString(), extractPigObject(entry.getValue(), hfs.getMapValueSchema().get(0))); } + return result; + } - @SuppressWarnings("unchecked") - private static DataBag transformToBag(List list, HCatFieldSchema hfs) throws Exception { - if (list == null) { - return null; - } + @SuppressWarnings("unchecked") + private static DataBag transformToBag(List list, HCatFieldSchema hfs) throws Exception { + if (list == null) { + return null; + } - HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); - DataBag db = new DefaultDataBag(); - for (Object o : list) { - Tuple tuple; - if (elementSubFieldSchema.getType() == Type.STRUCT) { - tuple = transformToTuple((List) o, elementSubFieldSchema); - } else { - // bags always contain tuples - tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); - } - db.add(tuple); - } - return db; + HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); + DataBag db = new DefaultDataBag(); + for (Object o : list) { + Tuple tuple; + if (elementSubFieldSchema.getType() == Type.STRUCT) { + tuple = transformToTuple((List) o, elementSubFieldSchema); + } else { + // bags always contain tuples + tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); + } + db.add(tuple); } + return db; + } - private static void validateHCatSchemaFollowsPigRules(HCatSchema tblSchema) throws PigException { - for (HCatFieldSchema hcatField : tblSchema.getFields()) { - validateHcatFieldFollowsPigRules(hcatField); - } + private static void validateHCatSchemaFollowsPigRules(HCatSchema tblSchema) throws PigException { + for (HCatFieldSchema hcatField : tblSchema.getFields()) { + validateHcatFieldFollowsPigRules(hcatField); } + } - private static void validateHcatFieldFollowsPigRules(HCatFieldSchema hcatField) throws PigException { - try { - Type hType = hcatField.getType(); - switch (hType) { - case BOOLEAN: - if (!pigHasBooleanSupport) { - throw new PigException("Incompatible type found in HCat table schema: " - + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); - } - break; - case ARRAY: - validateHCatSchemaFollowsPigRules(hcatField.getArrayElementSchema()); - break; - case STRUCT: - validateHCatSchemaFollowsPigRules(hcatField.getStructSubSchema()); - break; - case MAP: - // key is only string - if (hcatField.getMapKeyType() != Type.STRING) { - LOG.info("Converting non-String key of map " + hcatField.getName() + " from " - + hcatField.getMapKeyType() + " to String."); - } - validateHCatSchemaFollowsPigRules(hcatField.getMapValueSchema()); - break; - } - } catch (HCatException e) { - throw new PigException("Incompatible type found in hcat table schema: " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE, e); - } + private static void validateHcatFieldFollowsPigRules(HCatFieldSchema hcatField) throws PigException { + try { + Type hType = hcatField.getType(); + switch (hType) { + case BOOLEAN: + if (!pigHasBooleanSupport) { + throw new PigException("Incompatible type found in HCat table schema: " + + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); + } + break; + case ARRAY: + validateHCatSchemaFollowsPigRules(hcatField.getArrayElementSchema()); + break; + case STRUCT: + validateHCatSchemaFollowsPigRules(hcatField.getStructSubSchema()); + break; + case MAP: + // key is only string + if (hcatField.getMapKeyType() != Type.STRING) { + LOG.info("Converting non-String key of map " + hcatField.getName() + " from " + + hcatField.getMapKeyType() + " to String."); + } + validateHCatSchemaFollowsPigRules(hcatField.getMapValueSchema()); + break; + } + } catch (HCatException e) { + throw new PigException("Incompatible type found in hcat table schema: " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE, e); } + } - public static void validateHCatTableSchemaFollowsPigRules(HCatSchema hcatTableSchema) throws IOException { - validateHCatSchemaFollowsPigRules(hcatTableSchema); - } + public static void validateHCatTableSchemaFollowsPigRules(HCatSchema hcatTableSchema) throws IOException { + validateHCatSchemaFollowsPigRules(hcatTableSchema); + } - public static void getConfigFromUDFProperties(Properties p, Configuration config, String propName) { - if (p.getProperty(propName) != null) { - config.set(propName, p.getProperty(propName)); - } + public static void getConfigFromUDFProperties(Properties p, Configuration config, String propName) { + if (p.getProperty(propName) != null) { + config.set(propName, p.getProperty(propName)); } + } - public static void saveConfigIntoUDFProperties(Properties p, Configuration config, String propName) { - if (config.get(propName) != null) { - p.setProperty(propName, config.get(propName)); - } + public static void saveConfigIntoUDFProperties(Properties p, Configuration config, String propName) { + if (config.get(propName) != null) { + p.setProperty(propName, config.get(propName)); } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/HCatStorerWrapper.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/HCatStorerWrapper.java index 1bd9bf8..a1584bf 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/HCatStorerWrapper.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/HCatStorerWrapper.java @@ -39,35 +39,35 @@ */ public class HCatStorerWrapper extends HCatStorer { - private String sign; - private String externalDir; + private String sign; + private String externalDir; - public HCatStorerWrapper(String partSpecs, String schema, String externalDir) throws Exception { - super(partSpecs, schema); - this.externalDir = externalDir; - } + public HCatStorerWrapper(String partSpecs, String schema, String externalDir) throws Exception { + super(partSpecs, schema); + this.externalDir = externalDir; + } - public HCatStorerWrapper(String partSpecs, String externalDir) throws Exception { - super(partSpecs); - this.externalDir = externalDir; - } + public HCatStorerWrapper(String partSpecs, String externalDir) throws Exception { + super(partSpecs); + this.externalDir = externalDir; + } - public HCatStorerWrapper(String externalDir) throws Exception{ - super(); - this.externalDir = externalDir; - } + public HCatStorerWrapper(String externalDir) throws Exception{ + super(); + this.externalDir = externalDir; + } - @Override - public void setStoreLocation(String location, Job job) throws IOException { - Properties udfProps = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[] { sign }); - udfProps.setProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION, externalDir); - super.setStoreLocation(location, job); - } + @Override + public void setStoreLocation(String location, Job job) throws IOException { + Properties udfProps = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[] { sign }); + udfProps.setProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION, externalDir); + super.setStoreLocation(location, job); + } - @Override - public void setStoreFuncUDFContextSignature(String signature) { - sign = signature; - super.setStoreFuncUDFContextSignature(signature); - } + @Override + public void setStoreFuncUDFContextSignature(String signature) { + sign = signature; + super.setStoreFuncUDFContextSignature(signature); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MockLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MockLoader.java index 4e97b4b..873d36a 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MockLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MockLoader.java @@ -44,140 +44,140 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.pig.MockLoader} instead */ public class MockLoader extends LoadFunc { - private static final class MockRecordReader extends RecordReader { - @Override - public void close() throws IOException { - } + private static final class MockRecordReader extends RecordReader { + @Override + public void close() throws IOException { + } - @Override - public Object getCurrentKey() throws IOException, InterruptedException { - return "mockKey"; - } + @Override + public Object getCurrentKey() throws IOException, InterruptedException { + return "mockKey"; + } - @Override - public Object getCurrentValue() throws IOException, InterruptedException { - return "mockValue"; - } + @Override + public Object getCurrentValue() throws IOException, InterruptedException { + return "mockValue"; + } - @Override - public float getProgress() throws IOException, InterruptedException { - return 0.5f; - } + @Override + public float getProgress() throws IOException, InterruptedException { + return 0.5f; + } - @Override - public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException, - InterruptedException { - } + @Override + public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException, + InterruptedException { + } - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { - return true; - } + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + return true; } + } - private static final class MockInputSplit extends InputSplit implements Writable { - private String location; + private static final class MockInputSplit extends InputSplit implements Writable { + private String location; - public MockInputSplit() { - } + public MockInputSplit() { + } - public MockInputSplit(String location) { - this.location = location; - } + public MockInputSplit(String location) { + this.location = location; + } - @Override - public String[] getLocations() throws IOException, InterruptedException { - return new String[]{location}; - } + @Override + public String[] getLocations() throws IOException, InterruptedException { + return new String[]{location}; + } - @Override - public long getLength() throws IOException, InterruptedException { - return 10000000; - } + @Override + public long getLength() throws IOException, InterruptedException { + return 10000000; + } - @Override - public boolean equals(Object arg0) { - return arg0 == this; - } + @Override + public boolean equals(Object arg0) { + return arg0 == this; + } - @Override - public int hashCode() { - return location.hashCode(); - } + @Override + public int hashCode() { + return location.hashCode(); + } - @Override - public void readFields(DataInput arg0) throws IOException { - location = arg0.readUTF(); - } + @Override + public void readFields(DataInput arg0) throws IOException { + location = arg0.readUTF(); + } - @Override - public void write(DataOutput arg0) throws IOException { - arg0.writeUTF(location); - } + @Override + public void write(DataOutput arg0) throws IOException { + arg0.writeUTF(location); } + } - private static final class MockInputFormat extends InputFormat { + private static final class MockInputFormat extends InputFormat { - private final String location; + private final String location; - public MockInputFormat(String location) { - this.location = location; - } + public MockInputFormat(String location) { + this.location = location; + } - @Override - public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) - throws IOException, InterruptedException { - return new MockRecordReader(); - } + @Override + public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) + throws IOException, InterruptedException { + return new MockRecordReader(); + } - @Override - public List getSplits(JobContext arg0) throws IOException, InterruptedException { - return Arrays.asList(new MockInputSplit(location)); - } + @Override + public List getSplits(JobContext arg0) throws IOException, InterruptedException { + return Arrays.asList(new MockInputSplit(location)); } + } - private static final Map> locationToData = new HashMap>(); + private static final Map> locationToData = new HashMap>(); - public static void setData(String location, Iterable data) { - locationToData.put(location, data); - } + public static void setData(String location, Iterable data) { + locationToData.put(location, data); + } - private String location; + private String location; - private Iterator data; + private Iterator data; - @Override - public String relativeToAbsolutePath(String location, Path curDir) throws IOException { - return location; - } + @Override + public String relativeToAbsolutePath(String location, Path curDir) throws IOException { + return location; + } - @Override - public void setLocation(String location, Job job) throws IOException { - this.location = location; - if (location == null) { - throw new IOException("null location passed to MockLoader"); - } - this.data = locationToData.get(location).iterator(); - if (this.data == null) { - throw new IOException("No data configured for location: " + location); - } + @Override + public void setLocation(String location, Job job) throws IOException { + this.location = location; + if (location == null) { + throw new IOException("null location passed to MockLoader"); } - - @Override - public Tuple getNext() throws IOException { - if (data == null) { - throw new IOException("data was not correctly initialized in MockLoader"); - } - return data.hasNext() ? data.next() : null; + this.data = locationToData.get(location).iterator(); + if (this.data == null) { + throw new IOException("No data configured for location: " + location); } + } - @Override - public InputFormat getInputFormat() throws IOException { - return new MockInputFormat(location); + @Override + public Tuple getNext() throws IOException { + if (data == null) { + throw new IOException("data was not correctly initialized in MockLoader"); } + return data.hasNext() ? data.next() : null; + } - @Override - public void prepareToRead(RecordReader arg0, PigSplit arg1) throws IOException { - } + @Override + public InputFormat getInputFormat() throws IOException { + return new MockInputFormat(location); + } + + @Override + public void prepareToRead(RecordReader arg0, PigSplit arg1) throws IOException { + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MyPigStorage.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MyPigStorage.java index d26ef5a..bbff365 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MyPigStorage.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MyPigStorage.java @@ -28,16 +28,16 @@ */ public class MyPigStorage extends PigStorage { - String arg2; + String arg2; - public MyPigStorage(String arg1, String arg2) throws IOException { - super(arg1); - this.arg2 = arg2; - } + public MyPigStorage(String arg1, String arg2) throws IOException { + super(arg1); + this.arg2 = arg2; + } - @Override - public void putNext(Tuple t) throws IOException { - t.append(arg2); - super.putNext(t); - } + @Override + public void putNext(Tuple t) throws IOException { + t.append(arg2); + super.putNext(t); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java index 6c933f9..cb9ce57 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java @@ -53,400 +53,400 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.pig.TestHCatLoader} instead */ public class TestHCatLoader extends TestCase { - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestHCatLoader.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; - private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; - - private static final String BASIC_TABLE = "junit_unparted_basic"; - private static final String COMPLEX_TABLE = "junit_unparted_complex"; - private static final String PARTITIONED_TABLE = "junit_parted_basic"; - private static final String SPECIFIC_SIZE_TABLE = "junit_specific_size"; - private static Driver driver; - - private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass - private static boolean setupHasRun = false; - - - private static Map> basicInputData; - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')"; + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestHCatLoader.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; + private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; + + private static final String BASIC_TABLE = "junit_unparted_basic"; + private static final String COMPLEX_TABLE = "junit_unparted_complex"; + private static final String PARTITIONED_TABLE = "junit_parted_basic"; + private static final String SPECIFIC_SIZE_TABLE = "junit_specific_size"; + private static Driver driver; + + private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass + private static boolean setupHasRun = false; + + + private static Map> basicInputData; + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); + createTable = createTable + "stored as " +storageFormat(); + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); } + } - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " +storageFormat(); - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } - } + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); + protected void guardedSetUpBeforeClass() throws Exception { + if (!setupHasRun) { + setupHasRun = true; + } else { + return; } - protected void guardedSetUpBeforeClass() throws Exception { - if (!setupHasRun) { - setupHasRun = true; - } else { - return; - } - - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - cleanup(); - - createTable(BASIC_TABLE, "a int, b string"); - createTable(COMPLEX_TABLE, - "name string, studentid int, " - + "contact struct, " - + "currently_registered_courses array, " - + "current_grades map, " - + "phnos array>"); - - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - createTable(SPECIFIC_SIZE_TABLE, "a int, b string"); - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - basicInputData = new HashMap>(); - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - String sj = "S" + j + "S"; - input[k] = si + "\t" + sj; - basicInputData.put(k, new Pair(i, sj)); - k++; - } - } - HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); - HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, - new String[]{ - //"Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", - //"Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", - } - ); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);"); - - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - server.registerQuery("B = foreach A generate a,b;"); - server.registerQuery("B2 = filter B by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); - - server.registerQuery("C = foreach A generate a,b;"); - server.registerQuery("C2 = filter C by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); - - server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});"); - server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); } - - private void cleanup() throws IOException, CommandNeedRetryException { - dropTable(BASIC_TABLE); - dropTable(COMPLEX_TABLE); - dropTable(PARTITIONED_TABLE); - dropTable(SPECIFIC_SIZE_TABLE); + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + cleanup(); + + createTable(BASIC_TABLE, "a int, b string"); + createTable(COMPLEX_TABLE, + "name string, studentid int, " + + "contact struct, " + + "currently_registered_courses array, " + + "current_grades map, " + + "phnos array>"); + + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + createTable(SPECIFIC_SIZE_TABLE, "a int, b string"); + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + basicInputData = new HashMap>(); + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + String sj = "S" + j + "S"; + input[k] = si + "\t" + sj; + basicInputData.put(k, new Pair(i, sj)); + k++; + } } - - protected void guardedTearDownAfterClass() throws Exception { - guardTestCount--; - if (guardTestCount > 0) { - return; - } - cleanup(); + HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); + HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, + new String[]{ + //"Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", + //"Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", + } + ); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);"); + + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); + server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); + server.registerQuery("B = foreach A generate a,b;"); + server.registerQuery("B2 = filter B by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); + + server.registerQuery("C = foreach A generate a,b;"); + server.registerQuery("C2 = filter C by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); + + server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});"); + server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); + server.executeBatch(); + + } + + private void cleanup() throws IOException, CommandNeedRetryException { + dropTable(BASIC_TABLE); + dropTable(COMPLEX_TABLE); + dropTable(PARTITIONED_TABLE); + dropTable(SPECIFIC_SIZE_TABLE); + } + + protected void guardedTearDownAfterClass() throws Exception { + guardTestCount--; + if (guardTestCount > 0) { + return; } - - @Override - protected void setUp() throws Exception { - guardedSetUpBeforeClass(); + cleanup(); + } + + @Override + protected void setUp() throws Exception { + guardedSetUpBeforeClass(); + } + + @Override + protected void tearDown() throws Exception { + guardedTearDownAfterClass(); + } + + public void testSchemaLoadBasic() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // test that schema was loaded correctly + server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); + Schema dumpedXSchema = server.dumpSchema("X"); + List Xfields = dumpedXSchema.getFields(); + assertEquals(2, Xfields.size()); + assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a")); + assertTrue(Xfields.get(0).type == DataType.INTEGER); + assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b")); + assertTrue(Xfields.get(1).type == DataType.CHARARRAY); + + } + + public void testReadDataBasic() throws IOException { + PigServer server = new PigServer(ExecType.LOCAL); + + server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("X"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(2, t.size()); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); + assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); + numTuplesRead++; } - - @Override - protected void tearDown() throws Exception { - guardedTearDownAfterClass(); + assertEquals(basicInputData.size(), numTuplesRead); + } + + public void testSchemaLoadComplex() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // test that schema was loaded correctly + server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); + Schema dumpedKSchema = server.dumpSchema("K"); + List Kfields = dumpedKSchema.getFields(); + assertEquals(6, Kfields.size()); + + assertEquals(DataType.CHARARRAY, Kfields.get(0).type); + assertEquals("name", Kfields.get(0).alias.toLowerCase()); + + assertEquals(DataType.INTEGER, Kfields.get(1).type); + assertEquals("studentid", Kfields.get(1).alias.toLowerCase()); + + assertEquals(DataType.TUPLE, Kfields.get(2).type); + assertEquals("contact", Kfields.get(2).alias.toLowerCase()); + { + assertNotNull(Kfields.get(2).schema); + assertTrue(Kfields.get(2).schema.getFields().size() == 2); + assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY); + assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno")); + assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY); + assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email")); } - - public void testSchemaLoadBasic() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // test that schema was loaded correctly - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Schema dumpedXSchema = server.dumpSchema("X"); - List Xfields = dumpedXSchema.getFields(); - assertEquals(2, Xfields.size()); - assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a")); - assertTrue(Xfields.get(0).type == DataType.INTEGER); - assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b")); - assertTrue(Xfields.get(1).type == DataType.CHARARRAY); - + assertEquals(DataType.BAG, Kfields.get(3).type); + assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase()); + { + assertNotNull(Kfields.get(3).schema); + assertEquals(1, Kfields.get(3).schema.getFields().size()); + assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type); + assertNotNull(Kfields.get(3).schema.getFields().get(0).schema); + assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size()); + assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type); + // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + // commented out, because the name becomes "innerfield" by default - we call it "course" in pig, + // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine } - - public void testReadDataBasic() throws IOException { - PigServer server = new PigServer(ExecType.LOCAL); - - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("X"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(2, t.size()); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); - assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); - numTuplesRead++; - } - assertEquals(basicInputData.size(), numTuplesRead); + assertEquals(DataType.MAP, Kfields.get(4).type); + assertEquals("current_grades", Kfields.get(4).alias.toLowerCase()); + assertEquals(DataType.BAG, Kfields.get(5).type); + assertEquals("phnos", Kfields.get(5).alias.toLowerCase()); + { + assertNotNull(Kfields.get(5).schema); + assertEquals(1, Kfields.get(5).schema.getFields().size()); + assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type); + assertNotNull(Kfields.get(5).schema.getFields().get(0).schema); + assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type); + assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type); + assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); } - public void testSchemaLoadComplex() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // test that schema was loaded correctly - server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Schema dumpedKSchema = server.dumpSchema("K"); - List Kfields = dumpedKSchema.getFields(); - assertEquals(6, Kfields.size()); - - assertEquals(DataType.CHARARRAY, Kfields.get(0).type); - assertEquals("name", Kfields.get(0).alias.toLowerCase()); - - assertEquals(DataType.INTEGER, Kfields.get(1).type); - assertEquals("studentid", Kfields.get(1).alias.toLowerCase()); - - assertEquals(DataType.TUPLE, Kfields.get(2).type); - assertEquals("contact", Kfields.get(2).alias.toLowerCase()); - { - assertNotNull(Kfields.get(2).schema); - assertTrue(Kfields.get(2).schema.getFields().size() == 2); - assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY); - assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno")); - assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY); - assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email")); - } - assertEquals(DataType.BAG, Kfields.get(3).type); - assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase()); - { - assertNotNull(Kfields.get(3).schema); - assertEquals(1, Kfields.get(3).schema.getFields().size()); - assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type); - assertNotNull(Kfields.get(3).schema.getFields().get(0).schema); - assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size()); - assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type); - // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - // commented out, because the name becomes "innerfield" by default - we call it "course" in pig, - // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine - } - assertEquals(DataType.MAP, Kfields.get(4).type); - assertEquals("current_grades", Kfields.get(4).alias.toLowerCase()); - assertEquals(DataType.BAG, Kfields.get(5).type); - assertEquals("phnos", Kfields.get(5).alias.toLowerCase()); - { - assertNotNull(Kfields.get(5).schema); - assertEquals(1, Kfields.get(5).schema.getFields().size()); - assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type); - assertNotNull(Kfields.get(5).schema.getFields().get(0).schema); - assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type); - assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type); - assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); - } - + } + + public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException { + PigServer server = new PigServer(ExecType.LOCAL); + + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size()); + + server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); + Schema dumpedWSchema = server.dumpSchema("W"); + List Wfields = dumpedWSchema.getFields(); + assertEquals(3, Wfields.size()); + assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a")); + assertTrue(Wfields.get(0).type == DataType.INTEGER); + assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b")); + assertTrue(Wfields.get(1).type == DataType.CHARARRAY); + assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt")); + assertTrue(Wfields.get(2).type == DataType.CHARARRAY); + + Iterator WIter = server.openIterator("W"); + Collection> valuesRead = new ArrayList>(); + while (WIter.hasNext()) { + Tuple t = WIter.next(); + assertTrue(t.size() == 3); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertTrue(t.get(2).getClass() == String.class); + valuesRead.add(new Pair((Integer) t.get(0), (String) t.get(1))); + if ((Integer) t.get(0) < 2) { + assertEquals("0", t.get(2)); + } else { + assertEquals("1", t.get(2)); + } } - - public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException { - PigServer server = new PigServer(ExecType.LOCAL); - - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList valuesReadFromHiveDriver = new ArrayList(); - driver.getResults(valuesReadFromHiveDriver); - assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size()); - - server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Schema dumpedWSchema = server.dumpSchema("W"); - List Wfields = dumpedWSchema.getFields(); - assertEquals(3, Wfields.size()); - assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a")); - assertTrue(Wfields.get(0).type == DataType.INTEGER); - assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b")); - assertTrue(Wfields.get(1).type == DataType.CHARARRAY); - assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt")); - assertTrue(Wfields.get(2).type == DataType.CHARARRAY); - - Iterator WIter = server.openIterator("W"); - Collection> valuesRead = new ArrayList>(); - while (WIter.hasNext()) { - Tuple t = WIter.next(); - assertTrue(t.size() == 3); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - valuesRead.add(new Pair((Integer) t.get(0), (String) t.get(1))); - if ((Integer) t.get(0) < 2) { - assertEquals("0", t.get(2)); - } else { - assertEquals("1", t.get(2)); - } - } - assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size()); - - server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.registerQuery("P1filter = filter P1 by bkt == '0';"); - Iterator P1Iter = server.openIterator("P1filter"); - int count1 = 0; - while (P1Iter.hasNext()) { - Tuple t = P1Iter.next(); - - assertEquals("0", t.get(2)); - assertEquals(1, t.get(0)); - count1++; - } - assertEquals(3, count1); - - server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.registerQuery("P2filter = filter P2 by bkt == '1';"); - Iterator P2Iter = server.openIterator("P2filter"); - int count2 = 0; - while (P2Iter.hasNext()) { - Tuple t = P2Iter.next(); - - assertEquals("1", t.get(2)); - assertTrue(((Integer) t.get(0)) > 1); - count2++; - } - assertEquals(6, count2); + assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size()); + + server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); + server.registerQuery("P1filter = filter P1 by bkt == '0';"); + Iterator P1Iter = server.openIterator("P1filter"); + int count1 = 0; + while (P1Iter.hasNext()) { + Tuple t = P1Iter.next(); + + assertEquals("0", t.get(2)); + assertEquals(1, t.get(0)); + count1++; } - - public void testProjectionsBasic() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // projections are handled by using generate, not "as" on the Load - - server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.registerQuery("Y2 = foreach Y1 generate a;"); - server.registerQuery("Y3 = foreach Y1 generate b,a;"); - Schema dumpedY2Schema = server.dumpSchema("Y2"); - Schema dumpedY3Schema = server.dumpSchema("Y3"); - List Y2fields = dumpedY2Schema.getFields(); - List Y3fields = dumpedY3Schema.getFields(); - assertEquals(1, Y2fields.size()); - assertEquals("a", Y2fields.get(0).alias.toLowerCase()); - assertEquals(DataType.INTEGER, Y2fields.get(0).type); - assertEquals(2, Y3fields.size()); - assertEquals("b", Y3fields.get(0).alias.toLowerCase()); - assertEquals(DataType.CHARARRAY, Y3fields.get(0).type); - assertEquals("a", Y3fields.get(1).alias.toLowerCase()); - assertEquals(DataType.INTEGER, Y3fields.get(1).type); - - int numTuplesRead = 0; - Iterator Y2Iter = server.openIterator("Y2"); - while (Y2Iter.hasNext()) { - Tuple t = Y2Iter.next(); - assertEquals(t.size(), 1); - assertTrue(t.get(0).getClass() == Integer.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); - numTuplesRead++; - } - numTuplesRead = 0; - Iterator Y3Iter = server.openIterator("Y3"); - while (Y3Iter.hasNext()) { - Tuple t = Y3Iter.next(); - assertEquals(t.size(), 2); - assertTrue(t.get(0).getClass() == String.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).second); - assertTrue(t.get(1).getClass() == Integer.class); - assertEquals(t.get(1), basicInputData.get(numTuplesRead).first); - numTuplesRead++; - } - assertEquals(basicInputData.size(), numTuplesRead); + assertEquals(3, count1); + + server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); + server.registerQuery("P2filter = filter P2 by bkt == '1';"); + Iterator P2Iter = server.openIterator("P2filter"); + int count2 = 0; + while (P2Iter.hasNext()) { + Tuple t = P2Iter.next(); + + assertEquals("1", t.get(2)); + assertTrue(((Integer) t.get(0)) > 1); + count2++; } - - public void testGetInputBytes() throws Exception { - File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000"); - file.deleteOnExit(); - RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); - randomAccessFile.setLength(2L * 1024 * 1024 * 1024); - - Job job = new Job(); - HCatLoader hCatLoader = new HCatLoader(); - hCatLoader.setUDFContextSignature(this.getName()); - hCatLoader.setLocation(SPECIFIC_SIZE_TABLE, job); - ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job); - assertEquals(2048, (long) statistics.getmBytes()); + assertEquals(6, count2); + } + + public void testProjectionsBasic() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // projections are handled by using generate, not "as" on the Load + + server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); + server.registerQuery("Y2 = foreach Y1 generate a;"); + server.registerQuery("Y3 = foreach Y1 generate b,a;"); + Schema dumpedY2Schema = server.dumpSchema("Y2"); + Schema dumpedY3Schema = server.dumpSchema("Y3"); + List Y2fields = dumpedY2Schema.getFields(); + List Y3fields = dumpedY3Schema.getFields(); + assertEquals(1, Y2fields.size()); + assertEquals("a", Y2fields.get(0).alias.toLowerCase()); + assertEquals(DataType.INTEGER, Y2fields.get(0).type); + assertEquals(2, Y3fields.size()); + assertEquals("b", Y3fields.get(0).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Y3fields.get(0).type); + assertEquals("a", Y3fields.get(1).alias.toLowerCase()); + assertEquals(DataType.INTEGER, Y3fields.get(1).type); + + int numTuplesRead = 0; + Iterator Y2Iter = server.openIterator("Y2"); + while (Y2Iter.hasNext()) { + Tuple t = Y2Iter.next(); + assertEquals(t.size(), 1); + assertTrue(t.get(0).getClass() == Integer.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); + numTuplesRead++; } - - public void testConvertBooleanToInt() throws Exception { - String tbl = "test_convert_boolean_to_int"; - String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; - File inputDataDir = new File(inputFileName).getParentFile(); - inputDataDir.mkdir(); - - String[] lines = new String[]{"llama\t1", "alpaca\t0"}; - HcatTestUtils.createTestDataFile(inputFileName, lines); - - assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); - assertEquals(0, driver.run("create external table " + tbl + - " (a string, b boolean) row format delimited fields terminated by '\t'" + - " stored as textfile location 'file://" + - inputDataDir.getAbsolutePath() + "'").getResponseCode()); - - Properties properties = new Properties(); - properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); - PigServer server = new PigServer(ExecType.LOCAL, properties); - server.registerQuery( - "data = load 'test_convert_boolean_to_int' using org.apache.hcatalog.pig.HCatLoader();"); - Schema schema = server.dumpSchema("data"); - assertEquals(2, schema.getFields().size()); - - assertEquals("a", schema.getField(0).alias); - assertEquals(DataType.CHARARRAY, schema.getField(0).type); - assertEquals("b", schema.getField(1).alias); - assertEquals(DataType.INTEGER, schema.getField(1).type); - - Iterator iterator = server.openIterator("data"); - Tuple t = iterator.next(); - assertEquals("llama", t.get(0)); - // TODO: Figure out how to load a text file into Hive with boolean columns. This next assert - // passes because data was loaded as integers, not because it was converted. - assertEquals(1, t.get(1)); - t = iterator.next(); - assertEquals("alpaca", t.get(0)); - assertEquals(0, t.get(1)); - assertFalse(iterator.hasNext()); + numTuplesRead = 0; + Iterator Y3Iter = server.openIterator("Y3"); + while (Y3Iter.hasNext()) { + Tuple t = Y3Iter.next(); + assertEquals(t.size(), 2); + assertTrue(t.get(0).getClass() == String.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).second); + assertTrue(t.get(1).getClass() == Integer.class); + assertEquals(t.get(1), basicInputData.get(numTuplesRead).first); + numTuplesRead++; } + assertEquals(basicInputData.size(), numTuplesRead); + } + + public void testGetInputBytes() throws Exception { + File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000"); + file.deleteOnExit(); + RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); + randomAccessFile.setLength(2L * 1024 * 1024 * 1024); + + Job job = new Job(); + HCatLoader hCatLoader = new HCatLoader(); + hCatLoader.setUDFContextSignature(this.getName()); + hCatLoader.setLocation(SPECIFIC_SIZE_TABLE, job); + ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job); + assertEquals(2048, (long) statistics.getmBytes()); + } + + public void testConvertBooleanToInt() throws Exception { + String tbl = "test_convert_boolean_to_int"; + String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; + File inputDataDir = new File(inputFileName).getParentFile(); + inputDataDir.mkdir(); + + String[] lines = new String[]{"llama\t1", "alpaca\t0"}; + HcatTestUtils.createTestDataFile(inputFileName, lines); + + assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); + assertEquals(0, driver.run("create external table " + tbl + + " (a string, b boolean) row format delimited fields terminated by '\t'" + + " stored as textfile location 'file://" + + inputDataDir.getAbsolutePath() + "'").getResponseCode()); + + Properties properties = new Properties(); + properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); + PigServer server = new PigServer(ExecType.LOCAL, properties); + server.registerQuery( + "data = load 'test_convert_boolean_to_int' using org.apache.hcatalog.pig.HCatLoader();"); + Schema schema = server.dumpSchema("data"); + assertEquals(2, schema.getFields().size()); + + assertEquals("a", schema.getField(0).alias); + assertEquals(DataType.CHARARRAY, schema.getField(0).type); + assertEquals("b", schema.getField(1).alias); + assertEquals(DataType.INTEGER, schema.getField(1).type); + + Iterator iterator = server.openIterator("data"); + Tuple t = iterator.next(); + assertEquals("llama", t.get(0)); + // TODO: Figure out how to load a text file into Hive with boolean columns. This next assert + // passes because data was loaded as integers, not because it was converted. + assertEquals(1, t.get(1)); + t = iterator.next(); + assertEquals("alpaca", t.get(0)); + assertEquals(0, t.get(1)); + assertFalse(iterator.hasNext()); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderComplexSchema.java index 64fc05c..2085cf5 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderComplexSchema.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderComplexSchema.java @@ -54,250 +54,250 @@ */ public class TestHCatLoaderComplexSchema { - //private static MiniCluster cluster = MiniCluster.buildCluster(); - private static Driver driver; - //private static Properties props; - private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderComplexSchema.class); - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); - } - - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; - LOG.info("Creating table:\n {}", createTable); - CommandProcessorResponse result = driver.run(createTable); - int retCode = result.getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); - } - } - - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); + //private static MiniCluster cluster = MiniCluster.buildCluster(); + private static Driver driver; + //private static Properties props; + private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderComplexSchema.class); + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - - HiveConf hiveConf = new HiveConf(TestHCatLoaderComplexSchema.class); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - //props = new Properties(); - //props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - + createTable = createTable + "stored as RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; + LOG.info("Creating table:\n {}", createTable); + CommandProcessorResponse result = driver.run(createTable); + int retCode = result.getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); } - - private static final TupleFactory tf = TupleFactory.getInstance(); - private static final BagFactory bf = BagFactory.getInstance(); - - private Tuple t(Object... objects) { - return tf.newTuple(Arrays.asList(objects)); + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + + HiveConf hiveConf = new HiveConf(TestHCatLoaderComplexSchema.class); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + //props = new Properties(); + //props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); + + } + + private static final TupleFactory tf = TupleFactory.getInstance(); + private static final BagFactory bf = BagFactory.getInstance(); + + private Tuple t(Object... objects) { + return tf.newTuple(Arrays.asList(objects)); + } + + private DataBag b(Tuple... objects) { + return bf.newDefaultBag(Arrays.asList(objects)); + } + + /** + * artificially complex nested schema to test nested schema conversion + * @throws Exception + */ + @Test + public void testSyntheticComplexSchema() throws Exception { + String pigSchema = + "a: " + + "(" + + "aa: chararray, " + + "ab: long, " + + "ac: map[], " + + "ad: { t: (ada: long) }, " + + "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + + "af: (afa: chararray, afb: long) " + + ")," + + "b: chararray, " + + "c: long, " + + "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; + + // with extra structs + String tableSchema = + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array>, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>>"; + + // without extra structs + String tableSchema2 = + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>"; + + List data = new ArrayList(); + for (int i = 0; i < 10; i++) { + Tuple t = t( + t( + "aa test", + 2l, + new HashMap() { + { + put("ac test1", "test 1"); + put("ac test2", "test 2"); + } + }, + b(t(3l), t(4l)), + b(t(5l, t("aeba test", 6l))), + t("afa test", 7l) + ), + "b test", + (long) i, + b(t(8l, t("dba test", 9l), b(t(10l))))); + + data.add(t); } - - private DataBag b(Tuple... objects) { - return bf.newDefaultBag(Arrays.asList(objects)); + verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, true); + verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, false); + verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, true); + verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, false); + + } + + private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List data, boolean provideSchemaToStorer) + throws IOException, CommandNeedRetryException, ExecException, FrontendException { + MockLoader.setData(tablename + "Input", data); + try { + createTable(tablename, tableSchema); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + tablename + "Input' using org.apache.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); + Schema dumpedASchema = server.dumpSchema("A"); + server.registerQuery("STORE A into '" + tablename + "' using org.apache.hcatalog.pig.HCatStorer(" + + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + + ");"); + + ExecJob execJob = server.executeBatch().get(0); + if (!execJob.getStatistics().isSuccessful()) { + throw new RuntimeException("Import failed", execJob.getException()); + } + // test that schema was loaded correctly + server.registerQuery("X = load '" + tablename + "' using org.apache.hcatalog.pig.HCatLoader();"); + server.dumpSchema("X"); + Iterator it = server.openIterator("X"); + int i = 0; + while (it.hasNext()) { + Tuple input = data.get(i++); + Tuple output = it.next(); + Assert.assertEquals(input.toString(), output.toString()); + LOG.info("tuple : {} ", output); + } + Schema dumpedXSchema = server.dumpSchema("X"); + + Assert.assertEquals( + "expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", + "", + compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); + + } finally { + dropTable(tablename); } - - /** - * artificially complex nested schema to test nested schema conversion - * @throws Exception - */ - @Test - public void testSyntheticComplexSchema() throws Exception { - String pigSchema = - "a: " + - "(" + - "aa: chararray, " + - "ab: long, " + - "ac: map[], " + - "ad: { t: (ada: long) }, " + - "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + - "af: (afa: chararray, afb: long) " + - ")," + - "b: chararray, " + - "c: long, " + - "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; - - // with extra structs - String tableSchema = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array>, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>>"; - - // without extra structs - String tableSchema2 = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>"; - - List data = new ArrayList(); - for (int i = 0; i < 10; i++) { - Tuple t = t( - t( - "aa test", - 2l, - new HashMap() { - { - put("ac test1", "test 1"); - put("ac test2", "test 2"); - } - }, - b(t(3l), t(4l)), - b(t(5l, t("aeba test", 6l))), - t("afa test", 7l) - ), - "b test", - (long) i, - b(t(8l, t("dba test", 9l), b(t(10l))))); - - data.add(t); - } - verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, true); - verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, false); - verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, true); - verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, false); - + } + + private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException { + if (expected == null || got == null) { + if (expected == got) { + return ""; + } else { + return "\nexpected " + expected + " got " + got; + } } - - private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List data, boolean provideSchemaToStorer) - throws IOException, CommandNeedRetryException, ExecException, FrontendException { - MockLoader.setData(tablename + "Input", data); - try { - createTable(tablename, tableSchema); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + tablename + "Input' using org.apache.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); - Schema dumpedASchema = server.dumpSchema("A"); - server.registerQuery("STORE A into '" + tablename + "' using org.apache.hcatalog.pig.HCatStorer(" - + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") - + ");"); - - ExecJob execJob = server.executeBatch().get(0); - if (!execJob.getStatistics().isSuccessful()) { - throw new RuntimeException("Import failed", execJob.getException()); - } - // test that schema was loaded correctly - server.registerQuery("X = load '" + tablename + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.dumpSchema("X"); - Iterator it = server.openIterator("X"); - int i = 0; - while (it.hasNext()) { - Tuple input = data.get(i++); - Tuple output = it.next(); - Assert.assertEquals(input.toString(), output.toString()); - LOG.info("tuple : {} ", output); - } - Schema dumpedXSchema = server.dumpSchema("X"); - - Assert.assertEquals( - "expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", - "", - compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); - - } finally { - dropTable(tablename); - } + if (expected.size() != got.size()) { + return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")"; } - - private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException { - if (expected == null || got == null) { - if (expected == got) { - return ""; - } else { - return "\nexpected " + expected + " got " + got; - } - } - if (expected.size() != got.size()) { - return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")"; - } - String message = ""; - for (int i = 0; i < expected.size(); i++) { - FieldSchema expectedField = expected.getField(i); - FieldSchema gotField = got.getField(i); - if (expectedField.type != gotField.type) { - message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")"; - } else { - message += compareIgnoreFiledNames(expectedField.schema, gotField.schema); - } - } - return message; + String message = ""; + for (int i = 0; i < expected.size(); i++) { + FieldSchema expectedField = expected.getField(i); + FieldSchema gotField = got.getField(i); + if (expectedField.type != gotField.type) { + message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")"; + } else { + message += compareIgnoreFiledNames(expectedField.schema, gotField.schema); + } } - - /** - * tests that unnecessary tuples are drop while converting schema - * (Pig requires Tuples in Bags) - * @throws Exception - */ - @Test - public void testTupleInBagInTupleInBag() throws Exception { - String pigSchema = "a: { b : ( c: { d: (i : long) } ) }"; - - String tableSchema = "a array< array< bigint > >"; - - List data = new ArrayList(); - data.add(t(b(t(b(t(100l), t(101l))), t(b(t(110l)))))); - data.add(t(b(t(b(t(200l))), t(b(t(210l))), t(b(t(220l)))))); - data.add(t(b(t(b(t(300l), t(301l)))))); - data.add(t(b(t(b(t(400l))), t(b(t(410l), t(411l), t(412l)))))); - - - verifyWriteRead("TupleInBagInTupleInBag1", pigSchema, tableSchema, data, true); - verifyWriteRead("TupleInBagInTupleInBag2", pigSchema, tableSchema, data, false); - - // test that we don't drop the unnecessary tuple if the table has the corresponding Struct - String tableSchema2 = "a array< struct< c: array< struct< i: bigint > > > >"; - - verifyWriteRead("TupleInBagInTupleInBag3", pigSchema, tableSchema2, data, true); - verifyWriteRead("TupleInBagInTupleInBag4", pigSchema, tableSchema2, data, false); - + return message; + } + + /** + * tests that unnecessary tuples are drop while converting schema + * (Pig requires Tuples in Bags) + * @throws Exception + */ + @Test + public void testTupleInBagInTupleInBag() throws Exception { + String pigSchema = "a: { b : ( c: { d: (i : long) } ) }"; + + String tableSchema = "a array< array< bigint > >"; + + List data = new ArrayList(); + data.add(t(b(t(b(t(100l), t(101l))), t(b(t(110l)))))); + data.add(t(b(t(b(t(200l))), t(b(t(210l))), t(b(t(220l)))))); + data.add(t(b(t(b(t(300l), t(301l)))))); + data.add(t(b(t(b(t(400l))), t(b(t(410l), t(411l), t(412l)))))); + + + verifyWriteRead("TupleInBagInTupleInBag1", pigSchema, tableSchema, data, true); + verifyWriteRead("TupleInBagInTupleInBag2", pigSchema, tableSchema, data, false); + + // test that we don't drop the unnecessary tuple if the table has the corresponding Struct + String tableSchema2 = "a array< struct< c: array< struct< i: bigint > > > >"; + + verifyWriteRead("TupleInBagInTupleInBag3", pigSchema, tableSchema2, data, true); + verifyWriteRead("TupleInBagInTupleInBag4", pigSchema, tableSchema2, data, false); + + } + + @Test + public void testMapWithComplexData() throws Exception { + String pigSchema = "a: long, b: map[]"; + String tableSchema = "a bigint, b map>"; + + List data = new ArrayList(); + for (int i = 0; i < 10; i++) { + Tuple t = t( + (long) i, + new HashMap() { + { + put("b test 1", t(1l, "test 1")); + put("b test 2", t(2l, "test 2")); + } + }); + + data.add(t); } + verifyWriteRead("testMapWithComplexData", pigSchema, tableSchema, data, true); + verifyWriteRead("testMapWithComplexData2", pigSchema, tableSchema, data, false); - @Test - public void testMapWithComplexData() throws Exception { - String pigSchema = "a: long, b: map[]"; - String tableSchema = "a bigint, b map>"; - - List data = new ArrayList(); - for (int i = 0; i < 10; i++) { - Tuple t = t( - (long) i, - new HashMap() { - { - put("b test 1", t(1l, "test 1")); - put("b test 2", t(2l, "test 2")); - } - }); - - data.add(t); - } - verifyWriteRead("testMapWithComplexData", pigSchema, tableSchema, data, true); - verifyWriteRead("testMapWithComplexData2", pigSchema, tableSchema, data, false); - - } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderStorer.java index 0cab16c..d2f8956 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderStorer.java @@ -42,101 +42,101 @@ */ public class TestHCatLoaderStorer extends HCatBaseTest { - /** - * Ensure Pig can read/write tinyint/smallint columns. - */ - @Test - public void testSmallTinyInt() throws Exception { - - String readTblName = "test_small_tiny_int"; - File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData"); - File dataFile = new File(dataDir, "testSmallTinyInt.tsv"); - - String writeTblName = "test_small_tiny_int_write"; - File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv"); - - FileUtil.fullyDelete(dataDir); // Might not exist - Assert.assertTrue(dataDir.mkdir()); - - HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); - - // Create a table with smallint/tinyint columns, load data, and query from Hive. - Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create external table " + readTblName + - " (my_small_int smallint, my_tiny_int tinyint)" + - " row format delimited fields terminated by '\t' stored as textfile").getResponseCode()); - Assert.assertEquals(0, driver.run("load data local inpath '" + - dataDir.getAbsolutePath() + "' into table " + readTblName).getResponseCode()); - - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery( - "data = load '" + readTblName + "' using org.apache.hcatalog.pig.HCatLoader();"); - - // Ensure Pig schema is correct. - Schema schema = server.dumpSchema("data"); - Assert.assertEquals(2, schema.getFields().size()); - Assert.assertEquals("my_small_int", schema.getField(0).alias); - Assert.assertEquals(DataType.INTEGER, schema.getField(0).type); - Assert.assertEquals("my_tiny_int", schema.getField(1).alias); - Assert.assertEquals(DataType.INTEGER, schema.getField(1).type); - - // Ensure Pig can read data correctly. - Iterator it = server.openIterator("data"); - Tuple t = it.next(); - Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0)); - Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1)); - t = it.next(); - Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0)); - Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1)); - Assert.assertFalse(it.hasNext()); - - // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the - // bounds of the column type are written, and values outside throw an exception. - Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create table " + writeTblName + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); - - // Values within the column type bounds. - HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); - smallTinyIntBoundsCheckHelper(writeDataFile.getAbsolutePath(), ExecJob.JOB_STATUS.COMPLETED); - - // Values outside the column type bounds will fail at runtime. - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{ - String.format("%d\t%d", Short.MIN_VALUE - 1, 0)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{ - String.format("%d\t%d", Short.MAX_VALUE + 1, 0)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED); - } - - private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) - throws Exception { - Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode()); - Assert.assertEquals(0, driver.run("create table test_tbl" + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("data = load '" + data + - "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); - server.registerQuery( - "store data into 'test_tbl' using org.apache.hcatalog.pig.HCatStorer();"); - List jobs = server.executeBatch(); - Assert.assertEquals(expectedStatus, jobs.get(0).getStatus()); - } + /** + * Ensure Pig can read/write tinyint/smallint columns. + */ + @Test + public void testSmallTinyInt() throws Exception { + + String readTblName = "test_small_tiny_int"; + File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData"); + File dataFile = new File(dataDir, "testSmallTinyInt.tsv"); + + String writeTblName = "test_small_tiny_int_write"; + File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv"); + + FileUtil.fullyDelete(dataDir); // Might not exist + Assert.assertTrue(dataDir.mkdir()); + + HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{ + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); + + // Create a table with smallint/tinyint columns, load data, and query from Hive. + Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode()); + Assert.assertEquals(0, driver.run("create external table " + readTblName + + " (my_small_int smallint, my_tiny_int tinyint)" + + " row format delimited fields terminated by '\t' stored as textfile").getResponseCode()); + Assert.assertEquals(0, driver.run("load data local inpath '" + + dataDir.getAbsolutePath() + "' into table " + readTblName).getResponseCode()); + + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery( + "data = load '" + readTblName + "' using org.apache.hcatalog.pig.HCatLoader();"); + + // Ensure Pig schema is correct. + Schema schema = server.dumpSchema("data"); + Assert.assertEquals(2, schema.getFields().size()); + Assert.assertEquals("my_small_int", schema.getField(0).alias); + Assert.assertEquals(DataType.INTEGER, schema.getField(0).type); + Assert.assertEquals("my_tiny_int", schema.getField(1).alias); + Assert.assertEquals(DataType.INTEGER, schema.getField(1).type); + + // Ensure Pig can read data correctly. + Iterator it = server.openIterator("data"); + Tuple t = it.next(); + Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0)); + Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1)); + t = it.next(); + Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0)); + Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1)); + Assert.assertFalse(it.hasNext()); + + // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the + // bounds of the column type are written, and values outside throw an exception. + Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode()); + Assert.assertEquals(0, driver.run("create table " + writeTblName + + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + + // Values within the column type bounds. + HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{ + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); + smallTinyIntBoundsCheckHelper(writeDataFile.getAbsolutePath(), ExecJob.JOB_STATUS.COMPLETED); + + // Values outside the column type bounds will fail at runtime. + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{ + String.format("%d\t%d", Short.MIN_VALUE - 1, 0)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{ + String.format("%d\t%d", Short.MAX_VALUE + 1, 0)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{ + String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{ + String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED); + } + + private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) + throws Exception { + Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode()); + Assert.assertEquals(0, driver.run("create table test_tbl" + + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("data = load '" + data + + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); + server.registerQuery( + "store data into 'test_tbl' using org.apache.hcatalog.pig.HCatStorer();"); + List jobs = server.executeBatch(); + Assert.assertEquals(expectedStatus, jobs.get(0).getStatus()); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java index 0bf898b..3d8d3bb 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java @@ -43,618 +43,618 @@ */ public class TestHCatStorer extends HCatBaseTest { - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - @Test - public void testPartColsInData() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + "\t1"; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "1"); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - } - - @Test - public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { - - driver.run("drop table employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); - pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); - pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); - pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); - pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); - pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); - pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); - pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + ""; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "1"); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - } - - @Test - public void testNoAlias() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_parted"); - String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - PigServer server = new PigServer(ExecType.LOCAL); - boolean errCaught = false; - try { - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = foreach A generate a+10, b;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); - server.executeBatch(); - } catch (PigException fe) { - PigException pe = LogUtils.getPigException(fe); - Assert.assertTrue(pe instanceof FrontendException); - Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); - errCaught = true; - } - Assert.assertTrue(errCaught); - errCaught = false; - try { - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); - server.registerQuery("B = foreach A generate a, B;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); - server.executeBatch(); - } catch (PigException fe) { - PigException pe = LogUtils.getPigException(fe); - Assert.assertTrue(pe instanceof FrontendException); - Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B")); - errCaught = true; - } - driver.run("drop table junit_parted"); - Assert.assertTrue(errCaught); - } - - @Test - public void testStoreMultiTables() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - driver.run("drop table junit_unparted2"); - createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; - retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = filter A by a < 2;"); - server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); - server.registerQuery("C = filter A by a >= 2;"); - server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("select * from junit_unparted2"); - ArrayList res2 = new ArrayList(); - driver.getResults(res2); - - res.addAll(res2); - driver.run("drop table junit_unparted"); - driver.run("drop table junit_unparted2"); - - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testEmptyStore() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = filter A by a > 100;"); - server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testBagNStruct() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(b string,a struct, arr_of_struct array, " + - "arr_of_struct2 array>, arr_of_struct3 array>) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = new String[]{"zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", - "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + - " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - Assert.assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); - Assert.assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, f binary, g binary) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int i = 0; - String[] input = new String[3]; - input[i++] = "0\t\t\t\t\t\t"; //Empty values except first column - input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\tbinary-data"; //First column empty - input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\tbinary-data"; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, f:bytearray);"); - //null gets stored into column g which is a binary field. - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray,f:bytearray');"); - server.executeBatch(); - - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - - Iterator itr = res.iterator(); - Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", itr.next()); - Assert.assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\tbinary-data\tNULL", itr.next()); - Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tbinary-data\tNULL", itr.next()); - Assert.assertFalse(itr.hasNext()); - - server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); - Iterator iter = server.openIterator("B"); - int count = 0; - int num5nulls = 0; - while (iter.hasNext()) { - Tuple t = iter.next(); - if (t.get(5) == null) { - num5nulls++; - } else { - Assert.assertTrue(t.get(5) instanceof DataByteArray); - } - Assert.assertNull(t.get(6)); - count++; - } - Assert.assertEquals(3, count); - Assert.assertEquals(1, num5nulls); - driver.run("drop table junit_unparted"); - } - - @Test - public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - inputData[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - Assert.assertEquals(si + "\t" + j, itr.next()); - } - } - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {}; - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(0, results.size()); - driver.run("drop table employee"); - } - - public void testPartitionPublish() - throws IOException, CommandNeedRetryException { - - driver.run("drop table ptn_fail"); - String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + "\tmath"; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME - + "' as (a:int, c:chararray);"); - server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() - + "($0);"); - server.registerQuery("store B into 'ptn_fail' using " - + HCatStorer.class.getName() + "('b=math');"); - server.executeBatch(); - - String query = "show partitions ptn_fail"; - retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new IOException("Error " + retCode + " running query " - + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - Assert.assertEquals(0, res.size()); - - // Make sure the partitions directory is not in hdfs. - Assert.assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); - Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")) - .exists()); - } - - static public class FailEvalFunc extends EvalFunc { - - /* - * @param Tuple /* @return null /* @throws IOException - * - * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) - */ - @Override - public Boolean exec(Tuple tuple) throws IOException { - throw new IOException("Eval Func to mimic Failure."); - } + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + @Test + public void testPartColsInData() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + "\t1"; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "1"); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + } + + @Test + public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { + + driver.run("drop table employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); + pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); + pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); + pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); + pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); + pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); + pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); + pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + ""; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "1"); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + } + + @Test + public void testNoAlias() throws IOException, CommandNeedRetryException { + driver.run("drop table junit_parted"); + String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + PigServer server = new PigServer(ExecType.LOCAL); + boolean errCaught = false; + try { + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = foreach A generate a+10, b;"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.executeBatch(); + } catch (PigException fe) { + PigException pe = LogUtils.getPigException(fe); + Assert.assertTrue(pe instanceof FrontendException); + Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); + Assert.assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); + errCaught = true; + } + Assert.assertTrue(errCaught); + errCaught = false; + try { + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); + server.registerQuery("B = foreach A generate a, B;"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.executeBatch(); + } catch (PigException fe) { + PigException pe = LogUtils.getPigException(fe); + Assert.assertTrue(pe instanceof FrontendException); + Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); + Assert.assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B")); + errCaught = true; + } + driver.run("drop table junit_parted"); + Assert.assertTrue(errCaught); + } + + @Test + public void testStoreMultiTables() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + driver.run("drop table junit_unparted2"); + createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; + retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = filter A by a < 2;"); + server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.registerQuery("C = filter A by a >= 2;"); + server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("select * from junit_unparted2"); + ArrayList res2 = new ArrayList(); + driver.getResults(res2); + + res.addAll(res2); + driver.run("drop table junit_unparted"); + driver.run("drop table junit_unparted2"); + + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testEmptyStore() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = filter A by a > 100;"); + server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testBagNStruct() throws IOException, CommandNeedRetryException { + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(b string,a struct, arr_of_struct array, " + + "arr_of_struct2 array>, arr_of_struct3 array>) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = new String[]{"zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", + "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + + " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + Assert.assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); + Assert.assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, f binary, g binary) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int i = 0; + String[] input = new String[3]; + input[i++] = "0\t\t\t\t\t\t"; //Empty values except first column + input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\tbinary-data"; //First column empty + input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\tbinary-data"; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, f:bytearray);"); + //null gets stored into column g which is a binary field. + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray,f:bytearray');"); + server.executeBatch(); + + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + + Iterator itr = res.iterator(); + Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", itr.next()); + Assert.assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\tbinary-data\tNULL", itr.next()); + Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tbinary-data\tNULL", itr.next()); + Assert.assertFalse(itr.hasNext()); + + server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); + Iterator iter = server.openIterator("B"); + int count = 0; + int num5nulls = 0; + while (iter.hasNext()) { + Tuple t = iter.next(); + if (t.get(5) == null) { + num5nulls++; + } else { + Assert.assertTrue(t.get(5) instanceof DataByteArray); + } + Assert.assertNull(t.get(6)); + count++; + } + Assert.assertEquals(3, count); + Assert.assertEquals(1, num5nulls); + driver.run("drop table junit_unparted"); + } + + @Test + public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + inputData[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + Assert.assertEquals(si + "\t" + j, itr.next()); + } } + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {}; + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(0, results.size()); + driver.run("drop table employee"); + } + + public void testPartitionPublish() + throws IOException, CommandNeedRetryException { + + driver.run("drop table ptn_fail"); + String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + "\tmath"; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + + "' as (a:int, c:chararray);"); + server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() + + "($0);"); + server.registerQuery("store B into 'ptn_fail' using " + + HCatStorer.class.getName() + "('b=math');"); + server.executeBatch(); + + String query = "show partitions ptn_fail"; + retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new IOException("Error " + retCode + " running query " + + query); + } + + ArrayList res = new ArrayList(); + driver.getResults(res); + Assert.assertEquals(0, res.size()); + + // Make sure the partitions directory is not in hdfs. + Assert.assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); + Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")) + .exists()); + } + + static public class FailEvalFunc extends EvalFunc { + + /* + * @param Tuple /* @return null /* @throws IOException + * + * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) + */ + @Override + public Boolean exec(Tuple tuple) throws IOException { + throw new IOException("Eval Func to mimic Failure."); + } + + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java index a90e9c3..8a4901b 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java @@ -41,164 +41,164 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.pig.TestHCatStorerMulti} instead */ public class TestHCatStorerMulti extends TestCase { - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestHCatStorerMulti.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - private static final String BASIC_TABLE = "junit_unparted_basic"; - private static final String PARTITIONED_TABLE = "junit_parted_basic"; - private static Driver driver; - - private static Map> basicInputData; - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')"; + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestHCatStorerMulti.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + private static final String BASIC_TABLE = "junit_unparted_basic"; + private static final String PARTITIONED_TABLE = "junit_parted_basic"; + private static Driver driver; + + private static Map> basicInputData; + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); + createTable = createTable + "stored as " + storageFormat(); + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); } - - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " + storageFormat(); - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + @Override + protected void setUp() throws Exception { + if (driver == null) { + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); } - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); - } + cleanup(); + } - @Override - protected void setUp() throws Exception { - if (driver == null) { - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - } - - cleanup(); - } + @Override + protected void tearDown() throws Exception { + cleanup(); + } - @Override - protected void tearDown() throws Exception { - cleanup(); - } + public void testStoreBasicTable() throws Exception { - public void testStoreBasicTable() throws Exception { + createTable(BASIC_TABLE, "a int, b string"); - createTable(BASIC_TABLE, "a int, b string"); + populateBasicFile(); - populateBasicFile(); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); + server.executeBatch(); - server.executeBatch(); + driver.run("select * from " + BASIC_TABLE); + ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(unpartitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); + } - driver.run("select * from " + BASIC_TABLE); - ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(unpartitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); - } + public void testStorePartitionedTable() throws Exception { + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - public void testStorePartitionedTable() throws Exception { - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + populateBasicFile(); - populateBasicFile(); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B2 = filter A by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("C2 = filter A by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); - server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); - server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); + server.executeBatch(); - server.executeBatch(); + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(partitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + } - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(partitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); - } + public void testStoreTableMulti() throws Exception { - public void testStoreTableMulti() throws Exception { + createTable(BASIC_TABLE, "a int, b string"); + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - createTable(BASIC_TABLE, "a int, b string"); - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + populateBasicFile(); - populateBasicFile(); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); + server.registerQuery("B2 = filter A by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("C2 = filter A by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); - server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); - server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); + server.executeBatch(); - server.executeBatch(); + driver.run("select * from " + BASIC_TABLE); + ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(unpartitionedTableValuesReadFromHiveDriver); + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(partitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); + assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + } - driver.run("select * from " + BASIC_TABLE); - ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(unpartitionedTableValuesReadFromHiveDriver); - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(partitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); - assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + private void populateBasicFile() throws IOException { + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + basicInputData = new HashMap>(); + int k = 0; + File file = new File(INPUT_FILE_NAME); + file.deleteOnExit(); + FileWriter writer = new FileWriter(file); + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + String sj = "S" + j + "S"; + input[k] = si + "\t" + sj; + basicInputData.put(k, new Pair(i, sj)); + writer.write(input[k] + "\n"); + k++; + } } + writer.close(); + } - private void populateBasicFile() throws IOException { - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - basicInputData = new HashMap>(); - int k = 0; - File file = new File(INPUT_FILE_NAME); - file.deleteOnExit(); - FileWriter writer = new FileWriter(file); - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - String sj = "S" + j + "S"; - input[k] = si + "\t" + sj; - basicInputData.put(k, new Pair(i, sj)); - writer.write(input[k] + "\n"); - k++; - } - } - writer.close(); + private void cleanup() throws IOException, CommandNeedRetryException { + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); } + new File(TEST_WAREHOUSE_DIR).mkdirs(); - private void cleanup() throws IOException, CommandNeedRetryException { - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - dropTable(BASIC_TABLE); - dropTable(PARTITIONED_TABLE); - } + dropTable(BASIC_TABLE); + dropTable(PARTITIONED_TABLE); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerWrapper.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerWrapper.java index 755d437..976fa3d 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerWrapper.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerWrapper.java @@ -43,52 +43,52 @@ */ public class TestHCatStorerWrapper extends HCatBaseTest { - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - @Test - public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException{ + @Test + public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException{ - File tmpExternalDir = new File(SystemUtils.getJavaIoTmpDir(), UUID.randomUUID().toString()); - tmpExternalDir.deleteOnExit(); + File tmpExternalDir = new File(SystemUtils.getJavaIoTmpDir(), UUID.randomUUID().toString()); + tmpExternalDir.deleteOnExit(); - String part_val = "100"; + String part_val = "100"; - driver.run("drop table junit_external"); - String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + driver.run("drop table junit_external"); + String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - int LOOP_SIZE = 3; - String[] inputData = new String[LOOP_SIZE*LOOP_SIZE]; - int k = 0; - for(int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - inputData[k++] = si + "\t"+j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - logAndRegister(server, "A = load '"+INPUT_FILE_NAME+"' as (a:int, b:chararray);"); - logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() - + "('c=" + part_val + "','" + tmpExternalDir.getAbsolutePath() + "');"); - server.executeBatch(); - - Assert.assertTrue(tmpExternalDir.exists()); - Assert.assertTrue(new File(tmpExternalDir.getAbsoluteFile() + "/" + "part-m-00000").exists()); + int LOOP_SIZE = 3; + String[] inputData = new String[LOOP_SIZE*LOOP_SIZE]; + int k = 0; + for(int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for(int j=1;j<=LOOP_SIZE;j++) { + inputData[k++] = si + "\t"+j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + logAndRegister(server, "A = load '"+INPUT_FILE_NAME+"' as (a:int, b:chararray);"); + logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() + + "('c=" + part_val + "','" + tmpExternalDir.getAbsolutePath() + "');"); + server.executeBatch(); - driver.run("select * from junit_external"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_external"); - Iterator itr = res.iterator(); - for(int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - Assert.assertEquals( si + "\t" + j + "\t" + part_val,itr.next()); - } - } - Assert.assertFalse(itr.hasNext()); + Assert.assertTrue(tmpExternalDir.exists()); + Assert.assertTrue(new File(tmpExternalDir.getAbsoluteFile() + "/" + "part-m-00000").exists()); + driver.run("select * from junit_external"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_external"); + Iterator itr = res.iterator(); + for(int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for(int j=1;j<=LOOP_SIZE;j++) { + Assert.assertEquals( si + "\t" + j + "\t" + part_val,itr.next()); + } } + Assert.assertFalse(itr.hasNext()); + + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java index 7f3e5d3..53be63b 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java @@ -23,10 +23,10 @@ */ public class TestOrcHCatLoader extends TestHCatLoader { - @Override - protected String storageFormat() { - return "orc"; - } + @Override + protected String storageFormat() { + return "orc"; + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java index 4afbaa0..74e5711 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java @@ -23,9 +23,9 @@ */ public class TestOrcHCatStorer extends TestHCatStorerMulti { - @Override - protected String storageFormat() { - return "orc"; - } + @Override + protected String storageFormat() { + return "orc"; + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java index 7ba7179..971296a 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java @@ -35,60 +35,60 @@ */ public class TestPigHCatUtil { - @Test - public void testGetBagSubSchema() throws Exception { - - // Define the expected schema. - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); - - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); - - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); - - // Get the actual converted schema. - HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); - HCatFieldSchema hCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); - ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema); - - Assert.assertEquals(expected.toString(), actual.toString()); - } - - @Test - public void testGetBagSubSchemaConfigured() throws Exception { - - // NOTE: pig-0.8 sets client system properties by actually getting the client - // system properties. Starting in pig-0.9 you must pass the properties in. - // When updating our pig dependency this will need updated. - System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t"); - System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple"); - UDFContext.getUDFContext().setClientSystemProps(System.getProperties()); - - // Define the expected schema. - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); - - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); - - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); - - // Get the actual converted schema. - HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); - HCatFieldSchema actualHCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); - ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema); - - Assert.assertEquals(expected.toString(), actual.toString()); - } + @Test + public void testGetBagSubSchema() throws Exception { + + // Define the expected schema. + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple") + .setDescription("The tuple in the bag").setType(DataType.TUPLE); + + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = + new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); + + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); + + // Get the actual converted schema. + HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList( + new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatFieldSchema hCatFieldSchema = + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); + ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema); + + Assert.assertEquals(expected.toString(), actual.toString()); + } + + @Test + public void testGetBagSubSchemaConfigured() throws Exception { + + // NOTE: pig-0.8 sets client system properties by actually getting the client + // system properties. Starting in pig-0.9 you must pass the properties in. + // When updating our pig dependency this will need updated. + System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t"); + System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple"); + UDFContext.getUDFContext().setClientSystemProps(System.getProperties()); + + // Define the expected schema. + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t") + .setDescription("The tuple in the bag").setType(DataType.TUPLE); + + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = + new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); + + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); + + // Get the actual converted schema. + HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList( + new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatFieldSchema actualHCatFieldSchema = + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); + ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema); + + Assert.assertEquals(expected.toString(), actual.toString()); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java index a7dd185..b06e9b4 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java @@ -38,35 +38,35 @@ */ public class HCatStorerWrapper extends HCatStorer { - private String sign; - private String externalDir; + private String sign; + private String externalDir; - public HCatStorerWrapper(String partSpecs, String schema, String externalDir) throws Exception { - super(partSpecs, schema); - this.externalDir = externalDir; - } + public HCatStorerWrapper(String partSpecs, String schema, String externalDir) throws Exception { + super(partSpecs, schema); + this.externalDir = externalDir; + } - public HCatStorerWrapper(String partSpecs, String externalDir) throws Exception { - super(partSpecs); - this.externalDir = externalDir; - } + public HCatStorerWrapper(String partSpecs, String externalDir) throws Exception { + super(partSpecs); + this.externalDir = externalDir; + } - public HCatStorerWrapper(String externalDir) throws Exception{ - super(); - this.externalDir = externalDir; - } + public HCatStorerWrapper(String externalDir) throws Exception{ + super(); + this.externalDir = externalDir; + } - @Override - public void setStoreLocation(String location, Job job) throws IOException { - Properties udfProps = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[] { sign }); - udfProps.setProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION, externalDir); - super.setStoreLocation(location, job); - } + @Override + public void setStoreLocation(String location, Job job) throws IOException { + Properties udfProps = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[] { sign }); + udfProps.setProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION, externalDir); + super.setStoreLocation(location, job); + } - @Override - public void setStoreFuncUDFContextSignature(String signature) { - sign = signature; - super.setStoreFuncUDFContextSignature(signature); - } + @Override + public void setStoreFuncUDFContextSignature(String signature) { + sign = signature; + super.setStoreFuncUDFContextSignature(signature); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java index a368b01..c87b95a 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java @@ -41,140 +41,140 @@ import org.apache.pig.data.Tuple; public class MockLoader extends LoadFunc { - private static final class MockRecordReader extends RecordReader { - @Override - public void close() throws IOException { - } + private static final class MockRecordReader extends RecordReader { + @Override + public void close() throws IOException { + } - @Override - public Object getCurrentKey() throws IOException, InterruptedException { - return "mockKey"; - } + @Override + public Object getCurrentKey() throws IOException, InterruptedException { + return "mockKey"; + } - @Override - public Object getCurrentValue() throws IOException, InterruptedException { - return "mockValue"; - } + @Override + public Object getCurrentValue() throws IOException, InterruptedException { + return "mockValue"; + } - @Override - public float getProgress() throws IOException, InterruptedException { - return 0.5f; - } + @Override + public float getProgress() throws IOException, InterruptedException { + return 0.5f; + } - @Override - public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException, - InterruptedException { - } + @Override + public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException, + InterruptedException { + } - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { - return true; - } + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + return true; } + } - private static final class MockInputSplit extends InputSplit implements Writable { - private String location; + private static final class MockInputSplit extends InputSplit implements Writable { + private String location; - public MockInputSplit() { - } + public MockInputSplit() { + } - public MockInputSplit(String location) { - this.location = location; - } + public MockInputSplit(String location) { + this.location = location; + } - @Override - public String[] getLocations() throws IOException, InterruptedException { - return new String[]{location}; - } + @Override + public String[] getLocations() throws IOException, InterruptedException { + return new String[]{location}; + } - @Override - public long getLength() throws IOException, InterruptedException { - return 10000000; - } + @Override + public long getLength() throws IOException, InterruptedException { + return 10000000; + } - @Override - public boolean equals(Object arg0) { - return arg0 == this; - } + @Override + public boolean equals(Object arg0) { + return arg0 == this; + } - @Override - public int hashCode() { - return location.hashCode(); - } + @Override + public int hashCode() { + return location.hashCode(); + } - @Override - public void readFields(DataInput arg0) throws IOException { - location = arg0.readUTF(); - } + @Override + public void readFields(DataInput arg0) throws IOException { + location = arg0.readUTF(); + } - @Override - public void write(DataOutput arg0) throws IOException { - arg0.writeUTF(location); - } + @Override + public void write(DataOutput arg0) throws IOException { + arg0.writeUTF(location); } + } - private static final class MockInputFormat extends InputFormat { + private static final class MockInputFormat extends InputFormat { - private final String location; + private final String location; - public MockInputFormat(String location) { - this.location = location; - } + public MockInputFormat(String location) { + this.location = location; + } - @Override - public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) - throws IOException, InterruptedException { - return new MockRecordReader(); - } + @Override + public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) + throws IOException, InterruptedException { + return new MockRecordReader(); + } - @Override - public List getSplits(JobContext arg0) throws IOException, InterruptedException { - return Arrays.asList(new MockInputSplit(location)); - } + @Override + public List getSplits(JobContext arg0) throws IOException, InterruptedException { + return Arrays.asList(new MockInputSplit(location)); } + } - private static final Map> locationToData = new HashMap>(); + private static final Map> locationToData = new HashMap>(); - public static void setData(String location, Iterable data) { - locationToData.put(location, data); - } + public static void setData(String location, Iterable data) { + locationToData.put(location, data); + } - private String location; + private String location; - private Iterator data; + private Iterator data; - @Override - public String relativeToAbsolutePath(String location, Path curDir) throws IOException { - return location; - } + @Override + public String relativeToAbsolutePath(String location, Path curDir) throws IOException { + return location; + } - @Override - public void setLocation(String location, Job job) throws IOException { - this.location = location; - if (location == null) { - throw new IOException("null location passed to MockLoader"); - } - this.data = locationToData.get(location).iterator(); - if (this.data == null) { - throw new IOException("No data configured for location: " + location); - } + @Override + public void setLocation(String location, Job job) throws IOException { + this.location = location; + if (location == null) { + throw new IOException("null location passed to MockLoader"); } - - @Override - public Tuple getNext() throws IOException { - if (data == null) { - throw new IOException("data was not correctly initialized in MockLoader"); - } - return data.hasNext() ? data.next() : null; + this.data = locationToData.get(location).iterator(); + if (this.data == null) { + throw new IOException("No data configured for location: " + location); } + } - @Override - public InputFormat getInputFormat() throws IOException { - return new MockInputFormat(location); + @Override + public Tuple getNext() throws IOException { + if (data == null) { + throw new IOException("data was not correctly initialized in MockLoader"); } + return data.hasNext() ? data.next() : null; + } - @Override - public void prepareToRead(RecordReader arg0, PigSplit arg1) throws IOException { - } + @Override + public InputFormat getInputFormat() throws IOException { + return new MockInputFormat(location); + } + + @Override + public void prepareToRead(RecordReader arg0, PigSplit arg1) throws IOException { + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java index 76d7561..d056910 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java @@ -25,16 +25,16 @@ public class MyPigStorage extends PigStorage { - String arg2; + String arg2; - public MyPigStorage(String arg1, String arg2) throws IOException { - super(arg1); - this.arg2 = arg2; - } + public MyPigStorage(String arg1, String arg2) throws IOException { + super(arg1); + this.arg2 = arg2; + } - @Override - public void putNext(Tuple t) throws IOException { - t.append(arg2); - super.putNext(t); - } + @Override + public void putNext(Tuple t) throws IOException { + t.append(arg2); + super.putNext(t); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java index 2c7487c..bf437d2 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java @@ -55,176 +55,176 @@ public class TestE2EScenarios extends TestCase { - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestHCatLoader.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestHCatLoader.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String TEXTFILE_LOCN = TEST_DATA_DIR + "/textfile"; + private static final String TEXTFILE_LOCN = TEST_DATA_DIR + "/textfile"; - private static Driver driver; + private static Driver driver; - protected String storageFormat() { - return "orc"; - } - - @Override - protected void setUp() throws Exception { - - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); + protected String storageFormat() { + return "orc"; + } - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); + @Override + protected void setUp() throws Exception { + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); } - - @Override - protected void tearDown() throws Exception { - dropTable("inpy"); - dropTable("rc5318"); - dropTable("orc5318"); + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + } + + @Override + protected void tearDown() throws Exception { + dropTable("inpy"); + dropTable("rc5318"); + dropTable("orc5318"); + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy, String storageFormat) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); + if (storageFormat != null){ + createTable = createTable + "stored as " +storageFormat; } - - private void createTable(String tablename, String schema, String partitionedBy, String storageFormat) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - if (storageFormat != null){ - createTable = createTable + "stored as " +storageFormat; - } - driverRun(createTable); + driverRun(createTable); + } + + private void driverRun(String cmd) throws IOException, CommandNeedRetryException { + int retCode = driver.run(cmd).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to run [" + + cmd + "], return code from hive driver : [" + retCode + "]"); } - - private void driverRun(String cmd) throws IOException, CommandNeedRetryException { - int retCode = driver.run(cmd).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to run [" - + cmd + "], return code from hive driver : [" + retCode + "]"); - } + } + + private void pigDump(String tableName) throws IOException { + PigServer server = new PigServer(ExecType.LOCAL); + + System.err.println("==="); + System.err.println(tableName+":"); + server.registerQuery("X = load '" + tableName + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("X"); + while (XIter.hasNext()) { + Tuple t = XIter.next(); + for (Object o : t.getAll()){ + System.err.print( + "\t(" + o.getClass().getName() + ":" + + o.toString() + ")" + ); + } + System.err.println(""); } - - private void pigDump(String tableName) throws IOException { - PigServer server = new PigServer(ExecType.LOCAL); - - System.err.println("==="); - System.err.println(tableName+":"); - server.registerQuery("X = load '" + tableName - + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("X"); - while (XIter.hasNext()) { - Tuple t = XIter.next(); - for (Object o : t.getAll()){ - System.err.print( - "\t(" + o.getClass().getName() + ":" - + o.toString() + ")" - ); - } - System.err.println(""); - } - System.err.println("==="); + System.err.println("==="); + } + + + private void copyTable(String in, String out) throws IOException, InterruptedException { + Job ijob = new Job(); + Job ojob = new Job(); + HCatInputFormat inpy = new HCatInputFormat(); + inpy.setInput(ijob , null, in); + HCatOutputFormat oupy = new HCatOutputFormat(); + oupy.setOutput(ojob, + OutputJobInfo.create(null, out, new HashMap() + )); + + // Test HCatContext + + System.err.println("HCatContext INSTANCE is present : " +HCatContext.INSTANCE.getConf().isPresent()); + if (HCatContext.INSTANCE.getConf().isPresent()){ + System.err.println("HCatContext tinyint->int promotion says " + + HCatContext.INSTANCE.getConf().get().getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); } + HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration()); + System.err.println("Copying from ["+in+"] to ["+out+"] with schema : "+ tableSchema.toString()); + oupy.setSchema(ojob, tableSchema); + oupy.checkOutputSpecs(ojob); + OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration())); + oc.setupJob(ojob); - private void copyTable(String in, String out) throws IOException, InterruptedException { - Job ijob = new Job(); - Job ojob = new Job(); - HCatInputFormat inpy = new HCatInputFormat(); - inpy.setInput(ijob , null, in); - HCatOutputFormat oupy = new HCatOutputFormat(); - oupy.setOutput(ojob, - OutputJobInfo.create(null, out, new HashMap() - )); - - // Test HCatContext - - System.err.println("HCatContext INSTANCE is present : " +HCatContext.INSTANCE.getConf().isPresent()); - if (HCatContext.INSTANCE.getConf().isPresent()){ - System.err.println("HCatContext tinyint->int promotion says " + - HCatContext.INSTANCE.getConf().get().getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); - } - - HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration()); - System.err.println("Copying from ["+in+"] to ["+out+"] with schema : "+ tableSchema.toString()); - oupy.setSchema(ojob, tableSchema); - oupy.checkOutputSpecs(ojob); - OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration())); - oc.setupJob(ojob); - - for (InputSplit split : inpy.getSplits(ijob)){ - - TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration()); - TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration()); - - RecordReader rr = inpy.createRecordReader(split, rtaskContext); - rr.initialize(split, rtaskContext); - - OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext); - taskOc.setupTask(wtaskContext); - RecordWriter, HCatRecord> rw = oupy.getRecordWriter(wtaskContext); - - while(rr.nextKeyValue()){ - rw.write(rr.getCurrentKey(), rr.getCurrentValue()); - } - rw.close(wtaskContext); - taskOc.commitTask(wtaskContext); - rr.close(); - } - - oc.commitJob(ojob); - } + for (InputSplit split : inpy.getSplits(ijob)){ + + TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration()); + TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration()); + + RecordReader rr = inpy.createRecordReader(split, rtaskContext); + rr.initialize(split, rtaskContext); + + OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext); + taskOc.setupTask(wtaskContext); + RecordWriter, HCatRecord> rw = oupy.getRecordWriter(wtaskContext); - private TaskAttemptContext createTaskAttemptContext(Configuration tconf) { - Configuration conf = (tconf == null) ? (new Configuration()) : tconf; - TaskAttemptID taskId = new TaskAttemptID(); - conf.setInt("mapred.task.partition", taskId.getId()); - conf.set("mapred.task.id", "attempt__0000_r_000000_" + taskId.getId()); - TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf , taskId); - return rtaskContext; + while(rr.nextKeyValue()){ + rw.write(rr.getCurrentKey(), rr.getCurrentValue()); + } + rw.close(wtaskContext); + taskOc.commitTask(wtaskContext); + rr.close(); } + oc.commitJob(ojob); + } - public void testReadOrcAndRCFromPig() throws Exception { - String tableSchema = "ti tinyint, si smallint,i int, bi bigint, f float, d double, b boolean"; + private TaskAttemptContext createTaskAttemptContext(Configuration tconf) { + Configuration conf = (tconf == null) ? (new Configuration()) : tconf; + TaskAttemptID taskId = new TaskAttemptID(); + conf.setInt("mapred.task.partition", taskId.getId()); + conf.set("mapred.task.id", "attempt__0000_r_000000_" + taskId.getId()); + TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf , taskId); + return rtaskContext; + } - HcatTestUtils.createTestDataFile(TEXTFILE_LOCN, - new String[]{ - "-3\0019001\00186400\0014294967297\00134.532\0012184239842983489.1231231234\001true" - ,"0\0010\0010\0010\0010\0010\001false" - } - ); - // write this out to a file, and import it into hive - createTable("inpy",tableSchema,null,"textfile"); - createTable("rc5318",tableSchema,null,"rcfile"); - createTable("orc5318",tableSchema,null,"orc"); - driverRun("LOAD DATA LOCAL INPATH '"+TEXTFILE_LOCN+"' OVERWRITE INTO TABLE inpy"); + public void testReadOrcAndRCFromPig() throws Exception { + String tableSchema = "ti tinyint, si smallint,i int, bi bigint, f float, d double, b boolean"; + + HcatTestUtils.createTestDataFile(TEXTFILE_LOCN, + new String[]{ + "-3\0019001\00186400\0014294967297\00134.532\0012184239842983489.1231231234\001true" + ,"0\0010\0010\0010\0010\0010\001false" + } + ); - // write it out from hive to an rcfile table, and to an orc table + // write this out to a file, and import it into hive + createTable("inpy",tableSchema,null,"textfile"); + createTable("rc5318",tableSchema,null,"rcfile"); + createTable("orc5318",tableSchema,null,"orc"); + driverRun("LOAD DATA LOCAL INPATH '"+TEXTFILE_LOCN+"' OVERWRITE INTO TABLE inpy"); + + // write it out from hive to an rcfile table, and to an orc table // driverRun("insert overwrite table rc5318 select * from inpy"); - copyTable("inpy","rc5318"); + copyTable("inpy","rc5318"); // driverRun("insert overwrite table orc5318 select * from inpy"); - copyTable("inpy","orc5318"); + copyTable("inpy","orc5318"); - pigDump("inpy"); - pigDump("rc5318"); - pigDump("orc5318"); + pigDump("inpy"); + pigDump("rc5318"); + pigDump("orc5318"); - } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java index 4faba3e..bccd33d 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java @@ -50,402 +50,402 @@ import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; public class TestHCatLoader extends TestCase { - private static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + TestHCatLoader.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; - private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; - - private static final String BASIC_TABLE = "junit_unparted_basic"; - private static final String COMPLEX_TABLE = "junit_unparted_complex"; - private static final String PARTITIONED_TABLE = "junit_parted_basic"; - private static final String SPECIFIC_SIZE_TABLE = "junit_specific_size"; - private static Driver driver; - - private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass - private static boolean setupHasRun = false; - - - private static Map> basicInputData; - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + private static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + TestHCatLoader.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; + private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; + + private static final String BASIC_TABLE = "junit_unparted_basic"; + private static final String COMPLEX_TABLE = "junit_unparted_complex"; + private static final String PARTITIONED_TABLE = "junit_parted_basic"; + private static final String SPECIFIC_SIZE_TABLE = "junit_specific_size"; + private static Driver driver; + + private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass + private static boolean setupHasRun = false; + + + private static Map> basicInputData; + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); + createTable = createTable + "stored as " +storageFormat(); + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); } + } - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " +storageFormat(); - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } - } + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); + protected void guardedSetUpBeforeClass() throws Exception { + if (!setupHasRun) { + setupHasRun = true; + } else { + return; } - protected void guardedSetUpBeforeClass() throws Exception { - if (!setupHasRun) { - setupHasRun = true; - } else { - return; - } - - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - cleanup(); - - createTable(BASIC_TABLE, "a int, b string"); - createTable(COMPLEX_TABLE, - "name string, studentid int, " - + "contact struct, " - + "currently_registered_courses array, " - + "current_grades map, " - + "phnos array>"); - - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - createTable(SPECIFIC_SIZE_TABLE, "a int, b string"); - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - basicInputData = new HashMap>(); - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - String sj = "S" + j + "S"; - input[k] = si + "\t" + sj; - basicInputData.put(k, new Pair(i, sj)); - k++; - } - } - HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); - HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, - new String[]{ - //"Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", - //"Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", - } - ); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);"); - - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); - server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); - server.registerQuery("B = foreach A generate a,b;"); - server.registerQuery("B2 = filter B by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); - - server.registerQuery("C = foreach A generate a,b;"); - server.registerQuery("C2 = filter C by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); - - server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});"); - server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); } - - private void cleanup() throws IOException, CommandNeedRetryException { - dropTable(BASIC_TABLE); - dropTable(COMPLEX_TABLE); - dropTable(PARTITIONED_TABLE); - dropTable(SPECIFIC_SIZE_TABLE); + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + cleanup(); + + createTable(BASIC_TABLE, "a int, b string"); + createTable(COMPLEX_TABLE, + "name string, studentid int, " + + "contact struct, " + + "currently_registered_courses array, " + + "current_grades map, " + + "phnos array>"); + + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + createTable(SPECIFIC_SIZE_TABLE, "a int, b string"); + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + basicInputData = new HashMap>(); + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + String sj = "S" + j + "S"; + input[k] = si + "\t" + sj; + basicInputData.put(k, new Pair(i, sj)); + k++; + } } - - protected void guardedTearDownAfterClass() throws Exception { - guardTestCount--; - if (guardTestCount > 0) { - return; - } - cleanup(); + HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); + HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, + new String[]{ + //"Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", + //"Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", + } + ); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);"); + + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.registerQuery("B = foreach A generate a,b;"); + server.registerQuery("B2 = filter B by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + + server.registerQuery("C = foreach A generate a,b;"); + server.registerQuery("C2 = filter C by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + + server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});"); + server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.executeBatch(); + + } + + private void cleanup() throws IOException, CommandNeedRetryException { + dropTable(BASIC_TABLE); + dropTable(COMPLEX_TABLE); + dropTable(PARTITIONED_TABLE); + dropTable(SPECIFIC_SIZE_TABLE); + } + + protected void guardedTearDownAfterClass() throws Exception { + guardTestCount--; + if (guardTestCount > 0) { + return; } - - @Override - protected void setUp() throws Exception { - guardedSetUpBeforeClass(); + cleanup(); + } + + @Override + protected void setUp() throws Exception { + guardedSetUpBeforeClass(); + } + + @Override + protected void tearDown() throws Exception { + guardedTearDownAfterClass(); + } + + public void testSchemaLoadBasic() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // test that schema was loaded correctly + server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema dumpedXSchema = server.dumpSchema("X"); + List Xfields = dumpedXSchema.getFields(); + assertEquals(2, Xfields.size()); + assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a")); + assertTrue(Xfields.get(0).type == DataType.INTEGER); + assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b")); + assertTrue(Xfields.get(1).type == DataType.CHARARRAY); + + } + + public void testReadDataBasic() throws IOException { + PigServer server = new PigServer(ExecType.LOCAL); + + server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("X"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(2, t.size()); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); + assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); + numTuplesRead++; } - - @Override - protected void tearDown() throws Exception { - guardedTearDownAfterClass(); + assertEquals(basicInputData.size(), numTuplesRead); + } + + public void testSchemaLoadComplex() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // test that schema was loaded correctly + server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema dumpedKSchema = server.dumpSchema("K"); + List Kfields = dumpedKSchema.getFields(); + assertEquals(6, Kfields.size()); + + assertEquals(DataType.CHARARRAY, Kfields.get(0).type); + assertEquals("name", Kfields.get(0).alias.toLowerCase()); + + assertEquals(DataType.INTEGER, Kfields.get(1).type); + assertEquals("studentid", Kfields.get(1).alias.toLowerCase()); + + assertEquals(DataType.TUPLE, Kfields.get(2).type); + assertEquals("contact", Kfields.get(2).alias.toLowerCase()); + { + assertNotNull(Kfields.get(2).schema); + assertTrue(Kfields.get(2).schema.getFields().size() == 2); + assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY); + assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno")); + assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY); + assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email")); } - - public void testSchemaLoadBasic() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // test that schema was loaded correctly - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Schema dumpedXSchema = server.dumpSchema("X"); - List Xfields = dumpedXSchema.getFields(); - assertEquals(2, Xfields.size()); - assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a")); - assertTrue(Xfields.get(0).type == DataType.INTEGER); - assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b")); - assertTrue(Xfields.get(1).type == DataType.CHARARRAY); - + assertEquals(DataType.BAG, Kfields.get(3).type); + assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase()); + { + assertNotNull(Kfields.get(3).schema); + assertEquals(1, Kfields.get(3).schema.getFields().size()); + assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type); + assertNotNull(Kfields.get(3).schema.getFields().get(0).schema); + assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size()); + assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type); + // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + // commented out, because the name becomes "innerfield" by default - we call it "course" in pig, + // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine } - - public void testReadDataBasic() throws IOException { - PigServer server = new PigServer(ExecType.LOCAL); - - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("X"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(2, t.size()); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); - assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); - numTuplesRead++; - } - assertEquals(basicInputData.size(), numTuplesRead); + assertEquals(DataType.MAP, Kfields.get(4).type); + assertEquals("current_grades", Kfields.get(4).alias.toLowerCase()); + assertEquals(DataType.BAG, Kfields.get(5).type); + assertEquals("phnos", Kfields.get(5).alias.toLowerCase()); + { + assertNotNull(Kfields.get(5).schema); + assertEquals(1, Kfields.get(5).schema.getFields().size()); + assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type); + assertNotNull(Kfields.get(5).schema.getFields().get(0).schema); + assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type); + assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type); + assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); } - public void testSchemaLoadComplex() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // test that schema was loaded correctly - server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Schema dumpedKSchema = server.dumpSchema("K"); - List Kfields = dumpedKSchema.getFields(); - assertEquals(6, Kfields.size()); - - assertEquals(DataType.CHARARRAY, Kfields.get(0).type); - assertEquals("name", Kfields.get(0).alias.toLowerCase()); - - assertEquals(DataType.INTEGER, Kfields.get(1).type); - assertEquals("studentid", Kfields.get(1).alias.toLowerCase()); - - assertEquals(DataType.TUPLE, Kfields.get(2).type); - assertEquals("contact", Kfields.get(2).alias.toLowerCase()); - { - assertNotNull(Kfields.get(2).schema); - assertTrue(Kfields.get(2).schema.getFields().size() == 2); - assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY); - assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno")); - assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY); - assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email")); - } - assertEquals(DataType.BAG, Kfields.get(3).type); - assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase()); - { - assertNotNull(Kfields.get(3).schema); - assertEquals(1, Kfields.get(3).schema.getFields().size()); - assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type); - assertNotNull(Kfields.get(3).schema.getFields().get(0).schema); - assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size()); - assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type); - // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - // commented out, because the name becomes "innerfield" by default - we call it "course" in pig, - // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine - } - assertEquals(DataType.MAP, Kfields.get(4).type); - assertEquals("current_grades", Kfields.get(4).alias.toLowerCase()); - assertEquals(DataType.BAG, Kfields.get(5).type); - assertEquals("phnos", Kfields.get(5).alias.toLowerCase()); - { - assertNotNull(Kfields.get(5).schema); - assertEquals(1, Kfields.get(5).schema.getFields().size()); - assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type); - assertNotNull(Kfields.get(5).schema.getFields().get(0).schema); - assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type); - assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type); - assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); - } - + } + + public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException { + PigServer server = new PigServer(ExecType.LOCAL); + + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size()); + + server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema dumpedWSchema = server.dumpSchema("W"); + List Wfields = dumpedWSchema.getFields(); + assertEquals(3, Wfields.size()); + assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a")); + assertTrue(Wfields.get(0).type == DataType.INTEGER); + assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b")); + assertTrue(Wfields.get(1).type == DataType.CHARARRAY); + assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt")); + assertTrue(Wfields.get(2).type == DataType.CHARARRAY); + + Iterator WIter = server.openIterator("W"); + Collection> valuesRead = new ArrayList>(); + while (WIter.hasNext()) { + Tuple t = WIter.next(); + assertTrue(t.size() == 3); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertTrue(t.get(2).getClass() == String.class); + valuesRead.add(new Pair((Integer) t.get(0), (String) t.get(1))); + if ((Integer) t.get(0) < 2) { + assertEquals("0", t.get(2)); + } else { + assertEquals("1", t.get(2)); + } } - - public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException { - PigServer server = new PigServer(ExecType.LOCAL); - - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList valuesReadFromHiveDriver = new ArrayList(); - driver.getResults(valuesReadFromHiveDriver); - assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size()); - - server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Schema dumpedWSchema = server.dumpSchema("W"); - List Wfields = dumpedWSchema.getFields(); - assertEquals(3, Wfields.size()); - assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a")); - assertTrue(Wfields.get(0).type == DataType.INTEGER); - assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b")); - assertTrue(Wfields.get(1).type == DataType.CHARARRAY); - assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt")); - assertTrue(Wfields.get(2).type == DataType.CHARARRAY); - - Iterator WIter = server.openIterator("W"); - Collection> valuesRead = new ArrayList>(); - while (WIter.hasNext()) { - Tuple t = WIter.next(); - assertTrue(t.size() == 3); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - valuesRead.add(new Pair((Integer) t.get(0), (String) t.get(1))); - if ((Integer) t.get(0) < 2) { - assertEquals("0", t.get(2)); - } else { - assertEquals("1", t.get(2)); - } - } - assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size()); - - server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - server.registerQuery("P1filter = filter P1 by bkt == '0';"); - Iterator P1Iter = server.openIterator("P1filter"); - int count1 = 0; - while (P1Iter.hasNext()) { - Tuple t = P1Iter.next(); - - assertEquals("0", t.get(2)); - assertEquals(1, t.get(0)); - count1++; - } - assertEquals(3, count1); - - server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - server.registerQuery("P2filter = filter P2 by bkt == '1';"); - Iterator P2Iter = server.openIterator("P2filter"); - int count2 = 0; - while (P2Iter.hasNext()) { - Tuple t = P2Iter.next(); - - assertEquals("1", t.get(2)); - assertTrue(((Integer) t.get(0)) > 1); - count2++; - } - assertEquals(6, count2); + assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size()); + + server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("P1filter = filter P1 by bkt == '0';"); + Iterator P1Iter = server.openIterator("P1filter"); + int count1 = 0; + while (P1Iter.hasNext()) { + Tuple t = P1Iter.next(); + + assertEquals("0", t.get(2)); + assertEquals(1, t.get(0)); + count1++; } - - public void testProjectionsBasic() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // projections are handled by using generate, not "as" on the Load - - server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - server.registerQuery("Y2 = foreach Y1 generate a;"); - server.registerQuery("Y3 = foreach Y1 generate b,a;"); - Schema dumpedY2Schema = server.dumpSchema("Y2"); - Schema dumpedY3Schema = server.dumpSchema("Y3"); - List Y2fields = dumpedY2Schema.getFields(); - List Y3fields = dumpedY3Schema.getFields(); - assertEquals(1, Y2fields.size()); - assertEquals("a", Y2fields.get(0).alias.toLowerCase()); - assertEquals(DataType.INTEGER, Y2fields.get(0).type); - assertEquals(2, Y3fields.size()); - assertEquals("b", Y3fields.get(0).alias.toLowerCase()); - assertEquals(DataType.CHARARRAY, Y3fields.get(0).type); - assertEquals("a", Y3fields.get(1).alias.toLowerCase()); - assertEquals(DataType.INTEGER, Y3fields.get(1).type); - - int numTuplesRead = 0; - Iterator Y2Iter = server.openIterator("Y2"); - while (Y2Iter.hasNext()) { - Tuple t = Y2Iter.next(); - assertEquals(t.size(), 1); - assertTrue(t.get(0).getClass() == Integer.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); - numTuplesRead++; - } - numTuplesRead = 0; - Iterator Y3Iter = server.openIterator("Y3"); - while (Y3Iter.hasNext()) { - Tuple t = Y3Iter.next(); - assertEquals(t.size(), 2); - assertTrue(t.get(0).getClass() == String.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).second); - assertTrue(t.get(1).getClass() == Integer.class); - assertEquals(t.get(1), basicInputData.get(numTuplesRead).first); - numTuplesRead++; - } - assertEquals(basicInputData.size(), numTuplesRead); + assertEquals(3, count1); + + server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("P2filter = filter P2 by bkt == '1';"); + Iterator P2Iter = server.openIterator("P2filter"); + int count2 = 0; + while (P2Iter.hasNext()) { + Tuple t = P2Iter.next(); + + assertEquals("1", t.get(2)); + assertTrue(((Integer) t.get(0)) > 1); + count2++; } - - public void testGetInputBytes() throws Exception { - File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000"); - file.deleteOnExit(); - RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); - randomAccessFile.setLength(2L * 1024 * 1024 * 1024); - - Job job = new Job(); - HCatLoader hCatLoader = new HCatLoader(); - hCatLoader.setUDFContextSignature(this.getName()); - hCatLoader.setLocation(SPECIFIC_SIZE_TABLE, job); - ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job); - assertEquals(2048, (long) statistics.getmBytes()); + assertEquals(6, count2); + } + + public void testProjectionsBasic() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // projections are handled by using generate, not "as" on the Load + + server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("Y2 = foreach Y1 generate a;"); + server.registerQuery("Y3 = foreach Y1 generate b,a;"); + Schema dumpedY2Schema = server.dumpSchema("Y2"); + Schema dumpedY3Schema = server.dumpSchema("Y3"); + List Y2fields = dumpedY2Schema.getFields(); + List Y3fields = dumpedY3Schema.getFields(); + assertEquals(1, Y2fields.size()); + assertEquals("a", Y2fields.get(0).alias.toLowerCase()); + assertEquals(DataType.INTEGER, Y2fields.get(0).type); + assertEquals(2, Y3fields.size()); + assertEquals("b", Y3fields.get(0).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Y3fields.get(0).type); + assertEquals("a", Y3fields.get(1).alias.toLowerCase()); + assertEquals(DataType.INTEGER, Y3fields.get(1).type); + + int numTuplesRead = 0; + Iterator Y2Iter = server.openIterator("Y2"); + while (Y2Iter.hasNext()) { + Tuple t = Y2Iter.next(); + assertEquals(t.size(), 1); + assertTrue(t.get(0).getClass() == Integer.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); + numTuplesRead++; } - - public void testConvertBooleanToInt() throws Exception { - String tbl = "test_convert_boolean_to_int"; - String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; - File inputDataDir = new File(inputFileName).getParentFile(); - inputDataDir.mkdir(); - - String[] lines = new String[]{"llama\ttrue", "alpaca\tfalse"}; - HcatTestUtils.createTestDataFile(inputFileName, lines); - - assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); - assertEquals(0, driver.run("create external table " + tbl + - " (a string, b boolean) row format delimited fields terminated by '\t'" + - " stored as textfile location 'file://" + - inputDataDir.getPath().replaceAll("\\\\", "/") + "'").getResponseCode()); - - Properties properties = new Properties(); - properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); - PigServer server = new PigServer(ExecType.LOCAL, properties); - server.registerQuery( - "data = load 'test_convert_boolean_to_int' using org.apache.hive.hcatalog.pig.HCatLoader();"); - Schema schema = server.dumpSchema("data"); - assertEquals(2, schema.getFields().size()); - - assertEquals("a", schema.getField(0).alias); - assertEquals(DataType.CHARARRAY, schema.getField(0).type); - assertEquals("b", schema.getField(1).alias); - if (PigHCatUtil.pigHasBooleanSupport()){ - assertEquals(DataType.BOOLEAN, schema.getField(1).type); - } else { - assertEquals(DataType.INTEGER, schema.getField(1).type); - } - - Iterator iterator = server.openIterator("data"); - Tuple t = iterator.next(); - assertEquals("llama", t.get(0)); - assertEquals(1, t.get(1)); - t = iterator.next(); - assertEquals("alpaca", t.get(0)); - assertEquals(0, t.get(1)); - assertFalse(iterator.hasNext()); + numTuplesRead = 0; + Iterator Y3Iter = server.openIterator("Y3"); + while (Y3Iter.hasNext()) { + Tuple t = Y3Iter.next(); + assertEquals(t.size(), 2); + assertTrue(t.get(0).getClass() == String.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).second); + assertTrue(t.get(1).getClass() == Integer.class); + assertEquals(t.get(1), basicInputData.get(numTuplesRead).first); + numTuplesRead++; + } + assertEquals(basicInputData.size(), numTuplesRead); + } + + public void testGetInputBytes() throws Exception { + File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000"); + file.deleteOnExit(); + RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); + randomAccessFile.setLength(2L * 1024 * 1024 * 1024); + + Job job = new Job(); + HCatLoader hCatLoader = new HCatLoader(); + hCatLoader.setUDFContextSignature(this.getName()); + hCatLoader.setLocation(SPECIFIC_SIZE_TABLE, job); + ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job); + assertEquals(2048, (long) statistics.getmBytes()); + } + + public void testConvertBooleanToInt() throws Exception { + String tbl = "test_convert_boolean_to_int"; + String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; + File inputDataDir = new File(inputFileName).getParentFile(); + inputDataDir.mkdir(); + + String[] lines = new String[]{"llama\ttrue", "alpaca\tfalse"}; + HcatTestUtils.createTestDataFile(inputFileName, lines); + + assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); + assertEquals(0, driver.run("create external table " + tbl + + " (a string, b boolean) row format delimited fields terminated by '\t'" + + " stored as textfile location 'file://" + + inputDataDir.getPath().replaceAll("\\\\", "/") + "'").getResponseCode()); + + Properties properties = new Properties(); + properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); + PigServer server = new PigServer(ExecType.LOCAL, properties); + server.registerQuery( + "data = load 'test_convert_boolean_to_int' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema schema = server.dumpSchema("data"); + assertEquals(2, schema.getFields().size()); + + assertEquals("a", schema.getField(0).alias); + assertEquals(DataType.CHARARRAY, schema.getField(0).type); + assertEquals("b", schema.getField(1).alias); + if (PigHCatUtil.pigHasBooleanSupport()){ + assertEquals(DataType.BOOLEAN, schema.getField(1).type); + } else { + assertEquals(DataType.INTEGER, schema.getField(1).type); } + + Iterator iterator = server.openIterator("data"); + Tuple t = iterator.next(); + assertEquals("llama", t.get(0)); + assertEquals(1, t.get(1)); + t = iterator.next(); + assertEquals("alpaca", t.get(0)); + assertEquals(0, t.get(1)); + assertFalse(iterator.hasNext()); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java index 0f7182f..fa7764b 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java @@ -51,254 +51,254 @@ public class TestHCatLoaderComplexSchema { - //private static MiniCluster cluster = MiniCluster.buildCluster(); - private static Driver driver; - //private static Properties props; - private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderComplexSchema.class); - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); + //private static MiniCluster cluster = MiniCluster.buildCluster(); + private static Driver driver; + //private static Properties props; + private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderComplexSchema.class); + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + createTable = createTable + "stored as " + storageFormat(); + LOG.info("Creating table:\n {}", createTable); + CommandProcessorResponse result = driver.run(createTable); + int retCode = result.getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); } - - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " + storageFormat(); - LOG.info("Creating table:\n {}", createTable); - CommandProcessorResponse result = driver.run(createTable); - int retCode = result.getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); - } - } - - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); - } - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - - HiveConf hiveConf = new HiveConf(TestHCatLoaderComplexSchema.class); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - //props = new Properties(); - //props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - - } - - private static final TupleFactory tf = TupleFactory.getInstance(); - private static final BagFactory bf = BagFactory.getInstance(); - - private Tuple t(Object... objects) { - return tf.newTuple(Arrays.asList(objects)); + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + + HiveConf hiveConf = new HiveConf(TestHCatLoaderComplexSchema.class); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + //props = new Properties(); + //props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); + + } + + private static final TupleFactory tf = TupleFactory.getInstance(); + private static final BagFactory bf = BagFactory.getInstance(); + + private Tuple t(Object... objects) { + return tf.newTuple(Arrays.asList(objects)); + } + + private DataBag b(Tuple... objects) { + return bf.newDefaultBag(Arrays.asList(objects)); + } + + /** + * artificially complex nested schema to test nested schema conversion + * @throws Exception + */ + @Test + public void testSyntheticComplexSchema() throws Exception { + String pigSchema = + "a: " + + "(" + + "aa: chararray, " + + "ab: long, " + + "ac: map[], " + + "ad: { t: (ada: long) }, " + + "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + + "af: (afa: chararray, afb: long) " + + ")," + + "b: chararray, " + + "c: long, " + + "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; + + // with extra structs + String tableSchema = + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array>, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>>"; + + // without extra structs + String tableSchema2 = + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>"; + + List data = new ArrayList(); + for (int i = 0; i < 10; i++) { + Tuple t = t( + t( + "aa test", + 2l, + new HashMap() { + { + put("ac test1", "test 1"); + put("ac test2", "test 2"); + } + }, + b(t(3l), t(4l)), + b(t(5l, t("aeba test", 6l))), + t("afa test", 7l) + ), + "b test", + (long) i, + b(t(8l, t("dba test", 9l), b(t(10l))))); + + data.add(t); } - - private DataBag b(Tuple... objects) { - return bf.newDefaultBag(Arrays.asList(objects)); + verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, true); + verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, false); + verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, true); + verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, false); + + } + + private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List data, boolean provideSchemaToStorer) + throws IOException, CommandNeedRetryException, ExecException, FrontendException { + MockLoader.setData(tablename + "Input", data); + try { + createTable(tablename, tableSchema); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); + Schema dumpedASchema = server.dumpSchema("A"); + server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" + + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + + ");"); + + ExecJob execJob = server.executeBatch().get(0); + if (!execJob.getStatistics().isSuccessful()) { + throw new RuntimeException("Import failed", execJob.getException()); + } + // test that schema was loaded correctly + server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.dumpSchema("X"); + Iterator it = server.openIterator("X"); + int i = 0; + while (it.hasNext()) { + Tuple input = data.get(i++); + Tuple output = it.next(); + Assert.assertEquals(input.toString(), output.toString()); + LOG.info("tuple : {} ", output); + } + Schema dumpedXSchema = server.dumpSchema("X"); + + Assert.assertEquals( + "expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", + "", + compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); + + } finally { + dropTable(tablename); } - - /** - * artificially complex nested schema to test nested schema conversion - * @throws Exception - */ - @Test - public void testSyntheticComplexSchema() throws Exception { - String pigSchema = - "a: " + - "(" + - "aa: chararray, " + - "ab: long, " + - "ac: map[], " + - "ad: { t: (ada: long) }, " + - "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + - "af: (afa: chararray, afb: long) " + - ")," + - "b: chararray, " + - "c: long, " + - "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; - - // with extra structs - String tableSchema = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array>, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>>"; - - // without extra structs - String tableSchema2 = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>"; - - List data = new ArrayList(); - for (int i = 0; i < 10; i++) { - Tuple t = t( - t( - "aa test", - 2l, - new HashMap() { - { - put("ac test1", "test 1"); - put("ac test2", "test 2"); - } - }, - b(t(3l), t(4l)), - b(t(5l, t("aeba test", 6l))), - t("afa test", 7l) - ), - "b test", - (long) i, - b(t(8l, t("dba test", 9l), b(t(10l))))); - - data.add(t); - } - verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, true); - verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, false); - verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, true); - verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, false); - + } + + private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException { + if (expected == null || got == null) { + if (expected == got) { + return ""; + } else { + return "\nexpected " + expected + " got " + got; + } } - - private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List data, boolean provideSchemaToStorer) - throws IOException, CommandNeedRetryException, ExecException, FrontendException { - MockLoader.setData(tablename + "Input", data); - try { - createTable(tablename, tableSchema); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); - Schema dumpedASchema = server.dumpSchema("A"); - server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" - + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") - + ");"); - - ExecJob execJob = server.executeBatch().get(0); - if (!execJob.getStatistics().isSuccessful()) { - throw new RuntimeException("Import failed", execJob.getException()); - } - // test that schema was loaded correctly - server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - server.dumpSchema("X"); - Iterator it = server.openIterator("X"); - int i = 0; - while (it.hasNext()) { - Tuple input = data.get(i++); - Tuple output = it.next(); - Assert.assertEquals(input.toString(), output.toString()); - LOG.info("tuple : {} ", output); - } - Schema dumpedXSchema = server.dumpSchema("X"); - - Assert.assertEquals( - "expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", - "", - compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); - - } finally { - dropTable(tablename); - } + if (expected.size() != got.size()) { + return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")"; } - - private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException { - if (expected == null || got == null) { - if (expected == got) { - return ""; - } else { - return "\nexpected " + expected + " got " + got; - } - } - if (expected.size() != got.size()) { - return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")"; - } - String message = ""; - for (int i = 0; i < expected.size(); i++) { - FieldSchema expectedField = expected.getField(i); - FieldSchema gotField = got.getField(i); - if (expectedField.type != gotField.type) { - message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")"; - } else { - message += compareIgnoreFiledNames(expectedField.schema, gotField.schema); - } - } - return message; + String message = ""; + for (int i = 0; i < expected.size(); i++) { + FieldSchema expectedField = expected.getField(i); + FieldSchema gotField = got.getField(i); + if (expectedField.type != gotField.type) { + message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")"; + } else { + message += compareIgnoreFiledNames(expectedField.schema, gotField.schema); + } } - - /** - * tests that unnecessary tuples are drop while converting schema - * (Pig requires Tuples in Bags) - * @throws Exception - */ - @Test - public void testTupleInBagInTupleInBag() throws Exception { - String pigSchema = "a: { b : ( c: { d: (i : long) } ) }"; - - String tableSchema = "a array< array< bigint > >"; - - List data = new ArrayList(); - data.add(t(b(t(b(t(100l), t(101l))), t(b(t(110l)))))); - data.add(t(b(t(b(t(200l))), t(b(t(210l))), t(b(t(220l)))))); - data.add(t(b(t(b(t(300l), t(301l)))))); - data.add(t(b(t(b(t(400l))), t(b(t(410l), t(411l), t(412l)))))); - - - verifyWriteRead("TupleInBagInTupleInBag1", pigSchema, tableSchema, data, true); - verifyWriteRead("TupleInBagInTupleInBag2", pigSchema, tableSchema, data, false); - - // test that we don't drop the unnecessary tuple if the table has the corresponding Struct - String tableSchema2 = "a array< struct< c: array< struct< i: bigint > > > >"; - - verifyWriteRead("TupleInBagInTupleInBag3", pigSchema, tableSchema2, data, true); - verifyWriteRead("TupleInBagInTupleInBag4", pigSchema, tableSchema2, data, false); - + return message; + } + + /** + * tests that unnecessary tuples are drop while converting schema + * (Pig requires Tuples in Bags) + * @throws Exception + */ + @Test + public void testTupleInBagInTupleInBag() throws Exception { + String pigSchema = "a: { b : ( c: { d: (i : long) } ) }"; + + String tableSchema = "a array< array< bigint > >"; + + List data = new ArrayList(); + data.add(t(b(t(b(t(100l), t(101l))), t(b(t(110l)))))); + data.add(t(b(t(b(t(200l))), t(b(t(210l))), t(b(t(220l)))))); + data.add(t(b(t(b(t(300l), t(301l)))))); + data.add(t(b(t(b(t(400l))), t(b(t(410l), t(411l), t(412l)))))); + + + verifyWriteRead("TupleInBagInTupleInBag1", pigSchema, tableSchema, data, true); + verifyWriteRead("TupleInBagInTupleInBag2", pigSchema, tableSchema, data, false); + + // test that we don't drop the unnecessary tuple if the table has the corresponding Struct + String tableSchema2 = "a array< struct< c: array< struct< i: bigint > > > >"; + + verifyWriteRead("TupleInBagInTupleInBag3", pigSchema, tableSchema2, data, true); + verifyWriteRead("TupleInBagInTupleInBag4", pigSchema, tableSchema2, data, false); + + } + + @Test + public void testMapWithComplexData() throws Exception { + String pigSchema = "a: long, b: map[]"; + String tableSchema = "a bigint, b map>"; + + List data = new ArrayList(); + for (int i = 0; i < 10; i++) { + Tuple t = t( + (long) i, + new HashMap() { + { + put("b test 1", t(1l, "test 1")); + put("b test 2", t(2l, "test 2")); + } + }); + + data.add(t); } + verifyWriteRead("testMapWithComplexData", pigSchema, tableSchema, data, true); + verifyWriteRead("testMapWithComplexData2", pigSchema, tableSchema, data, false); - @Test - public void testMapWithComplexData() throws Exception { - String pigSchema = "a: long, b: map[]"; - String tableSchema = "a bigint, b map>"; - - List data = new ArrayList(); - for (int i = 0; i < 10; i++) { - Tuple t = t( - (long) i, - new HashMap() { - { - put("b test 1", t(1l, "test 1")); - put("b test 2", t(2l, "test 2")); - } - }); - - data.add(t); - } - verifyWriteRead("testMapWithComplexData", pigSchema, tableSchema, data, true); - verifyWriteRead("testMapWithComplexData2", pigSchema, tableSchema, data, false); - - } + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java index 43be7ab..b9568f8 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java @@ -41,101 +41,101 @@ */ public class TestHCatLoaderStorer extends HCatBaseTest { - /** - * Ensure Pig can read/write tinyint/smallint columns. - */ - @Test - public void testSmallTinyInt() throws Exception { - - String readTblName = "test_small_tiny_int"; - File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData"); - File dataFile = new File(dataDir, "testSmallTinyInt.tsv"); - - String writeTblName = "test_small_tiny_int_write"; - File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv"); - - FileUtil.fullyDelete(dataDir); // Might not exist - Assert.assertTrue(dataDir.mkdir()); - - HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); - - // Create a table with smallint/tinyint columns, load data, and query from Hive. - Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create external table " + readTblName + - " (my_small_int smallint, my_tiny_int tinyint)" + - " row format delimited fields terminated by '\t' stored as textfile").getResponseCode()); - Assert.assertEquals(0, driver.run("load data local inpath '" + - dataDir.getPath().replaceAll("\\\\", "/") + "' into table " + readTblName).getResponseCode()); - - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery( - "data = load '" + readTblName + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); - - // Ensure Pig schema is correct. - Schema schema = server.dumpSchema("data"); - Assert.assertEquals(2, schema.getFields().size()); - Assert.assertEquals("my_small_int", schema.getField(0).alias); - Assert.assertEquals(DataType.INTEGER, schema.getField(0).type); - Assert.assertEquals("my_tiny_int", schema.getField(1).alias); - Assert.assertEquals(DataType.INTEGER, schema.getField(1).type); - - // Ensure Pig can read data correctly. - Iterator it = server.openIterator("data"); - Tuple t = it.next(); - Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0)); - Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1)); - t = it.next(); - Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0)); - Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1)); - Assert.assertFalse(it.hasNext()); - - // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the - // bounds of the column type are written, and values outside throw an exception. - Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create table " + writeTblName + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); - - // Values within the column type bounds. - HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); - smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), ExecJob.JOB_STATUS.COMPLETED); - - // Values outside the column type bounds will fail at runtime. - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{ - String.format("%d\t%d", Short.MIN_VALUE - 1, 0)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{ - String.format("%d\t%d", Short.MAX_VALUE + 1, 0)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED); - } - - private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) - throws Exception { - Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode()); - Assert.assertEquals(0, driver.run("create table test_tbl" + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("data = load '" + data + - "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); - server.registerQuery( - "store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer();"); - List jobs = server.executeBatch(); - Assert.assertEquals(expectedStatus, jobs.get(0).getStatus()); - } + /** + * Ensure Pig can read/write tinyint/smallint columns. + */ + @Test + public void testSmallTinyInt() throws Exception { + + String readTblName = "test_small_tiny_int"; + File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData"); + File dataFile = new File(dataDir, "testSmallTinyInt.tsv"); + + String writeTblName = "test_small_tiny_int_write"; + File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv"); + + FileUtil.fullyDelete(dataDir); // Might not exist + Assert.assertTrue(dataDir.mkdir()); + + HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{ + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); + + // Create a table with smallint/tinyint columns, load data, and query from Hive. + Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode()); + Assert.assertEquals(0, driver.run("create external table " + readTblName + + " (my_small_int smallint, my_tiny_int tinyint)" + + " row format delimited fields terminated by '\t' stored as textfile").getResponseCode()); + Assert.assertEquals(0, driver.run("load data local inpath '" + + dataDir.getPath().replaceAll("\\\\", "/") + "' into table " + readTblName).getResponseCode()); + + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery( + "data = load '" + readTblName + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + + // Ensure Pig schema is correct. + Schema schema = server.dumpSchema("data"); + Assert.assertEquals(2, schema.getFields().size()); + Assert.assertEquals("my_small_int", schema.getField(0).alias); + Assert.assertEquals(DataType.INTEGER, schema.getField(0).type); + Assert.assertEquals("my_tiny_int", schema.getField(1).alias); + Assert.assertEquals(DataType.INTEGER, schema.getField(1).type); + + // Ensure Pig can read data correctly. + Iterator it = server.openIterator("data"); + Tuple t = it.next(); + Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0)); + Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1)); + t = it.next(); + Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0)); + Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1)); + Assert.assertFalse(it.hasNext()); + + // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the + // bounds of the column type are written, and values outside throw an exception. + Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode()); + Assert.assertEquals(0, driver.run("create table " + writeTblName + + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + + // Values within the column type bounds. + HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{ + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); + smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), ExecJob.JOB_STATUS.COMPLETED); + + // Values outside the column type bounds will fail at runtime. + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{ + String.format("%d\t%d", Short.MIN_VALUE - 1, 0)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{ + String.format("%d\t%d", Short.MAX_VALUE + 1, 0)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{ + String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{ + String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED); + } + + private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) + throws Exception { + Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode()); + Assert.assertEquals(0, driver.run("create table test_tbl" + + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("data = load '" + data + + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); + server.registerQuery( + "store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer();"); + List jobs = server.executeBatch(); + Assert.assertEquals(expectedStatus, jobs.get(0).getStatus()); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java index 12a5d28..2674708 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java @@ -40,619 +40,619 @@ public class TestHCatStorer extends HCatBaseTest { - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - @Test - public void testPartColsInData() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + "\t1"; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "1"); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - } - - @Test - public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { - - driver.run("drop table employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); - pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); - pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); - pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); - pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); - pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); - pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); - pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + ""; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "1"); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - } - - @Test - public void testNoAlias() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_parted"); - String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - PigServer server = new PigServer(ExecType.LOCAL); - boolean errCaught = false; - try { - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = foreach A generate a+10, b;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); - server.executeBatch(); - } catch (PigException fe) { - PigException pe = LogUtils.getPigException(fe); - Assert.assertTrue(pe instanceof FrontendException); - Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); - errCaught = true; - } - Assert.assertTrue(errCaught); - errCaught = false; - try { - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); - server.registerQuery("B = foreach A generate a, B;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); - server.executeBatch(); - } catch (PigException fe) { - PigException pe = LogUtils.getPigException(fe); - Assert.assertTrue(pe instanceof FrontendException); - Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B")); - errCaught = true; - } - driver.run("drop table junit_parted"); - Assert.assertTrue(errCaught); - } - - @Test - public void testStoreMultiTables() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - driver.run("drop table junit_unparted2"); - createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; - retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = filter A by a < 2;"); - server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); - server.registerQuery("C = filter A by a >= 2;"); - server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("select * from junit_unparted2"); - ArrayList res2 = new ArrayList(); - driver.getResults(res2); - - res.addAll(res2); - driver.run("drop table junit_unparted"); - driver.run("drop table junit_unparted2"); - - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testEmptyStore() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = filter A by a > 100;"); - server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testBagNStruct() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(b string,a struct, arr_of_struct array, " + - "arr_of_struct2 array>, arr_of_struct3 array>) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = new String[]{"zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", - "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + - " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - Assert.assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); - Assert.assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int i = 0; - String[] input = new String[3]; - input[i++] = "0\t\t\t\t\t\t\t"; //Empty values except first column - input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + "\tbinary-data"; //First column empty - input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + "\tbinary-data"; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);"); - //null gets stored into column g which is a binary field. - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');"); - server.executeBatch(); - - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - - Iterator itr = res.iterator(); - String next = itr.next(); - Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next ); - Assert.assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\ttrue\tbinary-data\tNULL", itr.next()); - Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", itr.next()); - Assert.assertFalse(itr.hasNext()); - - server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); - Iterator iter = server.openIterator("B"); - int count = 0; - int num5nulls = 0; - while (iter.hasNext()) { - Tuple t = iter.next(); - if (t.get(6) == null) { - num5nulls++; - } else { - Assert.assertTrue(t.get(6) instanceof DataByteArray); - } - Assert.assertNull(t.get(7)); - count++; - } - Assert.assertEquals(3, count); - Assert.assertEquals(1, num5nulls); - driver.run("drop table junit_unparted"); - } - - @Test - public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - inputData[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - Assert.assertEquals(si + "\t" + j, itr.next()); - } - } - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {}; - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(0, results.size()); - driver.run("drop table employee"); - } - - public void testPartitionPublish() - throws IOException, CommandNeedRetryException { - - driver.run("drop table ptn_fail"); - String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + "\tmath"; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME - + "' as (a:int, c:chararray);"); - server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() - + "($0);"); - server.registerQuery("store B into 'ptn_fail' using " - + HCatStorer.class.getName() + "('b=math');"); - server.executeBatch(); - - String query = "show partitions ptn_fail"; - retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new IOException("Error " + retCode + " running query " - + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - Assert.assertEquals(0, res.size()); - - // Make sure the partitions directory is not in hdfs. - Assert.assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); - Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")) - .exists()); - } - - static public class FailEvalFunc extends EvalFunc { - - /* - * @param Tuple /* @return null /* @throws IOException - * - * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) - */ - @Override - public Boolean exec(Tuple tuple) throws IOException { - throw new IOException("Eval Func to mimic Failure."); - } + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + @Test + public void testPartColsInData() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + "\t1"; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "1"); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + } + + @Test + public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { + + driver.run("drop table employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); + pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); + pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); + pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); + pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); + pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); + pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); + pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + ""; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "1"); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + } + + @Test + public void testNoAlias() throws IOException, CommandNeedRetryException { + driver.run("drop table junit_parted"); + String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + PigServer server = new PigServer(ExecType.LOCAL); + boolean errCaught = false; + try { + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = foreach A generate a+10, b;"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.executeBatch(); + } catch (PigException fe) { + PigException pe = LogUtils.getPigException(fe); + Assert.assertTrue(pe instanceof FrontendException); + Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); + Assert.assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); + errCaught = true; + } + Assert.assertTrue(errCaught); + errCaught = false; + try { + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); + server.registerQuery("B = foreach A generate a, B;"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.executeBatch(); + } catch (PigException fe) { + PigException pe = LogUtils.getPigException(fe); + Assert.assertTrue(pe instanceof FrontendException); + Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); + Assert.assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B")); + errCaught = true; + } + driver.run("drop table junit_parted"); + Assert.assertTrue(errCaught); + } + + @Test + public void testStoreMultiTables() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + driver.run("drop table junit_unparted2"); + createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; + retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = filter A by a < 2;"); + server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.registerQuery("C = filter A by a >= 2;"); + server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("select * from junit_unparted2"); + ArrayList res2 = new ArrayList(); + driver.getResults(res2); + + res.addAll(res2); + driver.run("drop table junit_unparted"); + driver.run("drop table junit_unparted2"); + + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testEmptyStore() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = filter A by a > 100;"); + server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testBagNStruct() throws IOException, CommandNeedRetryException { + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(b string,a struct, arr_of_struct array, " + + "arr_of_struct2 array>, arr_of_struct3 array>) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = new String[]{"zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", + "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + + " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + Assert.assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); + Assert.assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int i = 0; + String[] input = new String[3]; + input[i++] = "0\t\t\t\t\t\t\t"; //Empty values except first column + input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + "\tbinary-data"; //First column empty + input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + "\tbinary-data"; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);"); + //null gets stored into column g which is a binary field. + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');"); + server.executeBatch(); + + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + + Iterator itr = res.iterator(); + String next = itr.next(); + Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next ); + Assert.assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\ttrue\tbinary-data\tNULL", itr.next()); + Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", itr.next()); + Assert.assertFalse(itr.hasNext()); + + server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); + Iterator iter = server.openIterator("B"); + int count = 0; + int num5nulls = 0; + while (iter.hasNext()) { + Tuple t = iter.next(); + if (t.get(6) == null) { + num5nulls++; + } else { + Assert.assertTrue(t.get(6) instanceof DataByteArray); + } + Assert.assertNull(t.get(7)); + count++; + } + Assert.assertEquals(3, count); + Assert.assertEquals(1, num5nulls); + driver.run("drop table junit_unparted"); + } + + @Test + public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + inputData[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + Assert.assertEquals(si + "\t" + j, itr.next()); + } } + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {}; + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(0, results.size()); + driver.run("drop table employee"); + } + + public void testPartitionPublish() + throws IOException, CommandNeedRetryException { + + driver.run("drop table ptn_fail"); + String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + "\tmath"; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + + "' as (a:int, c:chararray);"); + server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() + + "($0);"); + server.registerQuery("store B into 'ptn_fail' using " + + HCatStorer.class.getName() + "('b=math');"); + server.executeBatch(); + + String query = "show partitions ptn_fail"; + retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new IOException("Error " + retCode + " running query " + + query); + } + + ArrayList res = new ArrayList(); + driver.getResults(res); + Assert.assertEquals(0, res.size()); + + // Make sure the partitions directory is not in hdfs. + Assert.assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); + Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")) + .exists()); + } + + static public class FailEvalFunc extends EvalFunc { + + /* + * @param Tuple /* @return null /* @throws IOException + * + * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) + */ + @Override + public Boolean exec(Tuple tuple) throws IOException { + throw new IOException("Eval Func to mimic Failure."); + } + + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java index e870a98..c0f3c2f 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java @@ -38,164 +38,164 @@ import org.apache.pig.PigServer; public class TestHCatStorerMulti extends TestCase { - private static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + TestHCatStorerMulti.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - private static final String BASIC_TABLE = "junit_unparted_basic"; - private static final String PARTITIONED_TABLE = "junit_parted_basic"; - private static Driver driver; - - private static Map> basicInputData; - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + private static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + TestHCatStorerMulti.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + private static final String BASIC_TABLE = "junit_unparted_basic"; + private static final String PARTITIONED_TABLE = "junit_parted_basic"; + private static Driver driver; + + private static Map> basicInputData; + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); + createTable = createTable + "stored as " + storageFormat(); + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); } - - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " + storageFormat(); - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + @Override + protected void setUp() throws Exception { + if (driver == null) { + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); } - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); - } + cleanup(); + } - @Override - protected void setUp() throws Exception { - if (driver == null) { - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - } - - cleanup(); - } + @Override + protected void tearDown() throws Exception { + cleanup(); + } - @Override - protected void tearDown() throws Exception { - cleanup(); - } + public void testStoreBasicTable() throws Exception { - public void testStoreBasicTable() throws Exception { + createTable(BASIC_TABLE, "a int, b string"); - createTable(BASIC_TABLE, "a int, b string"); + populateBasicFile(); - populateBasicFile(); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.executeBatch(); - server.executeBatch(); + driver.run("select * from " + BASIC_TABLE); + ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(unpartitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); + } - driver.run("select * from " + BASIC_TABLE); - ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(unpartitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); - } + public void testStorePartitionedTable() throws Exception { + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - public void testStorePartitionedTable() throws Exception { - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + populateBasicFile(); - populateBasicFile(); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B2 = filter A by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("C2 = filter A by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); - server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); - server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + server.executeBatch(); - server.executeBatch(); + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(partitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + } - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(partitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); - } + public void testStoreTableMulti() throws Exception { - public void testStoreTableMulti() throws Exception { + createTable(BASIC_TABLE, "a int, b string"); + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - createTable(BASIC_TABLE, "a int, b string"); - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + populateBasicFile(); - populateBasicFile(); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.registerQuery("B2 = filter A by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("C2 = filter A by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); - server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); - server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + server.executeBatch(); - server.executeBatch(); + driver.run("select * from " + BASIC_TABLE); + ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(unpartitionedTableValuesReadFromHiveDriver); + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(partitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); + assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + } - driver.run("select * from " + BASIC_TABLE); - ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(unpartitionedTableValuesReadFromHiveDriver); - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(partitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); - assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + private void populateBasicFile() throws IOException { + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + basicInputData = new HashMap>(); + int k = 0; + File file = new File(INPUT_FILE_NAME); + file.deleteOnExit(); + FileWriter writer = new FileWriter(file); + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + String sj = "S" + j + "S"; + input[k] = si + "\t" + sj; + basicInputData.put(k, new Pair(i, sj)); + writer.write(input[k] + "\n"); + k++; + } } + writer.close(); + } - private void populateBasicFile() throws IOException { - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - basicInputData = new HashMap>(); - int k = 0; - File file = new File(INPUT_FILE_NAME); - file.deleteOnExit(); - FileWriter writer = new FileWriter(file); - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - String sj = "S" + j + "S"; - input[k] = si + "\t" + sj; - basicInputData.put(k, new Pair(i, sj)); - writer.write(input[k] + "\n"); - k++; - } - } - writer.close(); + private void cleanup() throws IOException, CommandNeedRetryException { + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); } + new File(TEST_WAREHOUSE_DIR).mkdirs(); - private void cleanup() throws IOException, CommandNeedRetryException { - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - dropTable(BASIC_TABLE); - dropTable(PARTITIONED_TABLE); - } + dropTable(BASIC_TABLE); + dropTable(PARTITIONED_TABLE); + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java index e10f2c8..7281781 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java @@ -41,52 +41,52 @@ */ public class TestHCatStorerWrapper extends HCatBaseTest { - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - @Test - public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException{ + @Test + public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException{ - File tmpExternalDir = new File(TEST_DATA_DIR, UUID.randomUUID().toString()); - tmpExternalDir.deleteOnExit(); + File tmpExternalDir = new File(TEST_DATA_DIR, UUID.randomUUID().toString()); + tmpExternalDir.deleteOnExit(); - String part_val = "100"; + String part_val = "100"; - driver.run("drop table junit_external"); - String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + driver.run("drop table junit_external"); + String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - int LOOP_SIZE = 3; - String[] inputData = new String[LOOP_SIZE*LOOP_SIZE]; - int k = 0; - for(int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - inputData[k++] = si + "\t"+j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - logAndRegister(server, "A = load '"+INPUT_FILE_NAME+"' as (a:int, b:chararray);"); - logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() - + "('c=" + part_val + "','" + tmpExternalDir.getPath().replaceAll("\\\\", "/") + "');"); - server.executeBatch(); - - Assert.assertTrue(tmpExternalDir.exists()); - Assert.assertTrue(new File(tmpExternalDir.getPath().replaceAll("\\\\", "/") + "/" + "part-m-00000").exists()); + int LOOP_SIZE = 3; + String[] inputData = new String[LOOP_SIZE*LOOP_SIZE]; + int k = 0; + for(int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for(int j=1;j<=LOOP_SIZE;j++) { + inputData[k++] = si + "\t"+j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + logAndRegister(server, "A = load '"+INPUT_FILE_NAME+"' as (a:int, b:chararray);"); + logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() + + "('c=" + part_val + "','" + tmpExternalDir.getPath().replaceAll("\\\\", "/") + "');"); + server.executeBatch(); - driver.run("select * from junit_external"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_external"); - Iterator itr = res.iterator(); - for(int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - Assert.assertEquals( si + "\t" + j + "\t" + part_val,itr.next()); - } - } - Assert.assertFalse(itr.hasNext()); + Assert.assertTrue(tmpExternalDir.exists()); + Assert.assertTrue(new File(tmpExternalDir.getPath().replaceAll("\\\\", "/") + "/" + "part-m-00000").exists()); + driver.run("select * from junit_external"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_external"); + Iterator itr = res.iterator(); + for(int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for(int j=1;j<=LOOP_SIZE;j++) { + Assert.assertEquals( si + "\t" + j + "\t" + part_val,itr.next()); + } } + Assert.assertFalse(itr.hasNext()); + + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java index 8a48e4c..82eb0d7 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java @@ -20,10 +20,10 @@ public class TestOrcHCatLoader extends TestHCatLoader { - @Override - protected String storageFormat() { - return "orc"; - } + @Override + protected String storageFormat() { + return "orc"; + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java index a8748d0..0538771 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java @@ -20,9 +20,9 @@ public class TestOrcHCatLoaderComplexSchema extends TestHCatLoaderComplexSchema { - @Override - protected String storageFormat() { - return "orc"; - } + @Override + protected String storageFormat() { + return "orc"; + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java index ea12d42..1084092 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java @@ -20,9 +20,9 @@ public class TestOrcHCatStorer extends TestHCatStorerMulti { - @Override - protected String storageFormat() { - return "orc"; - } + @Override + protected String storageFormat() { + return "orc"; + } } diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java index 83dbbc0..a8ce61a 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java @@ -32,64 +32,64 @@ public class TestPigHCatUtil { - @Test - public void testGetBagSubSchema() throws Exception { - - // Define the expected schema. - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); - - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); - - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); - - // Get the actual converted schema. - HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); - HCatFieldSchema hCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); - ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema); - - Assert.assertEquals(expected.toString(), actual.toString()); - } - - @Test - public void testGetBagSubSchemaConfigured() throws Exception { - - // NOTE: pig-0.8 sets client system properties by actually getting the client - // system properties. Starting in pig-0.9 you must pass the properties in. - // When updating our pig dependency this will need updated. - System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t"); - System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple"); - UDFContext.getUDFContext().setClientSystemProps(System.getProperties()); - - // Define the expected schema. - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); - - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); - - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); - - // Get the actual converted schema. - HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); - HCatFieldSchema actualHCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); - ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema); - - Assert.assertEquals(expected.toString(), actual.toString()); - - // Clean up System properties that were set by this test - System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME); - System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME); - } + @Test + public void testGetBagSubSchema() throws Exception { + + // Define the expected schema. + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple") + .setDescription("The tuple in the bag").setType(DataType.TUPLE); + + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = + new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); + + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); + + // Get the actual converted schema. + HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList( + new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatFieldSchema hCatFieldSchema = + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); + ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema); + + Assert.assertEquals(expected.toString(), actual.toString()); + } + + @Test + public void testGetBagSubSchemaConfigured() throws Exception { + + // NOTE: pig-0.8 sets client system properties by actually getting the client + // system properties. Starting in pig-0.9 you must pass the properties in. + // When updating our pig dependency this will need updated. + System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t"); + System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple"); + UDFContext.getUDFContext().setClientSystemProps(System.getProperties()); + + // Define the expected schema. + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t") + .setDescription("The tuple in the bag").setType(DataType.TUPLE); + + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = + new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); + + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); + + // Get the actual converted schema. + HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList( + new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatFieldSchema actualHCatFieldSchema = + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); + ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema); + + Assert.assertEquals(expected.toString(), actual.toString()); + + // Clean up System properties that were set by this test + System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME); + System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/listener/NotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/listener/NotificationListener.java index 0c495a0..68e2b84 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/listener/NotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/listener/NotificationListener.java @@ -77,305 +77,305 @@ */ public class NotificationListener extends MetaStoreEventListener { - private static final Logger LOG = LoggerFactory.getLogger(NotificationListener.class); - protected Session session; - protected Connection conn; - private static MessageFactory messageFactory = MessageFactory.getInstance(); - - /** - * Create message bus connection and session in constructor. - */ - public NotificationListener(final Configuration conf) { - - super(conf); - createConnection(); - } - - private static String getTopicName(Partition partition, - ListenerEvent partitionEvent) throws MetaException { - try { - return partitionEvent.getHandler() - .get_table(partition.getDbName(), partition.getTableName()) - .getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); - } catch (NoSuchObjectException e) { - throw new MetaException(e.toString()); - } + private static final Logger LOG = LoggerFactory.getLogger(NotificationListener.class); + protected Session session; + protected Connection conn; + private static MessageFactory messageFactory = MessageFactory.getInstance(); + + /** + * Create message bus connection and session in constructor. + */ + public NotificationListener(final Configuration conf) { + + super(conf); + createConnection(); + } + + private static String getTopicName(Partition partition, + ListenerEvent partitionEvent) throws MetaException { + try { + return partitionEvent.getHandler() + .get_table(partition.getDbName(), partition.getTableName()) + .getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); + } catch (NoSuchObjectException e) { + throw new MetaException(e.toString()); } - - @Override - public void onAddPartition(AddPartitionEvent partitionEvent) - throws MetaException { - // Subscriber can get notification of newly add partition in a - // particular table by listening on a topic named "dbName.tableName" - // and message selector string as "HCAT_EVENT = HCAT_ADD_PARTITION" - if (partitionEvent.getStatus()) { - - Partition partition = partitionEvent.getPartition(); - String topicName = getTopicName(partition, partitionEvent); - if (topicName != null && !topicName.equals("")) { - send(messageFactory.buildAddPartitionMessage(partitionEvent.getTable(), partition), topicName); - } else { - LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " - + partition.getDbName() - + "." - + partition.getTableName() - + " To enable notifications for this table, please do alter table set properties (" - + HCatConstants.HCAT_MSGBUS_TOPIC_NAME - + "=.) or whatever you want topic name to be."); - } - } - + } + + @Override + public void onAddPartition(AddPartitionEvent partitionEvent) + throws MetaException { + // Subscriber can get notification of newly add partition in a + // particular table by listening on a topic named "dbName.tableName" + // and message selector string as "HCAT_EVENT = HCAT_ADD_PARTITION" + if (partitionEvent.getStatus()) { + + Partition partition = partitionEvent.getPartition(); + String topicName = getTopicName(partition, partitionEvent); + if (topicName != null && !topicName.equals("")) { + send(messageFactory.buildAddPartitionMessage(partitionEvent.getTable(), partition), topicName); + } else { + LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " + + partition.getDbName() + + "." + + partition.getTableName() + + " To enable notifications for this table, please do alter table set properties (" + + HCatConstants.HCAT_MSGBUS_TOPIC_NAME + + "=.) or whatever you want topic name to be."); + } } - /** - * Send dropped partition notifications. Subscribers can receive these notifications for a - * particular table by listening on a topic named "dbName.tableName" with message selector - * string {@value org.apache.hcatalog.common.HCatConstants#HCAT_EVENT} = - * {@value org.apache.hcatalog.common.HCatConstants#HCAT_DROP_PARTITION_EVENT}. - *
- * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been - * found to throw NPE when serializing objects that contain null. For this reason we override - * some fields in the StorageDescriptor of this notification. This should be fixed after - * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. - */ - @Override - public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { - if (partitionEvent.getStatus()) { - Partition partition = partitionEvent.getPartition(); - StorageDescriptor sd = partition.getSd(); - sd.setBucketCols(new ArrayList()); - sd.setSortCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSkewedInfo().setSkewedColNames(new ArrayList()); - String topicName = getTopicName(partition, partitionEvent); - if (topicName != null && !topicName.equals("")) { - send(messageFactory.buildDropPartitionMessage(partitionEvent.getTable(), partition), topicName); - } else { - LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " - + partition.getDbName() - + "." - + partition.getTableName() - + " To enable notifications for this table, please do alter table set properties (" - + HCatConstants.HCAT_MSGBUS_TOPIC_NAME - + "=.) or whatever you want topic name to be."); - } - } + } + + /** + * Send dropped partition notifications. Subscribers can receive these notifications for a + * particular table by listening on a topic named "dbName.tableName" with message selector + * string {@value org.apache.hcatalog.common.HCatConstants#HCAT_EVENT} = + * {@value org.apache.hcatalog.common.HCatConstants#HCAT_DROP_PARTITION_EVENT}. + *
+ * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been + * found to throw NPE when serializing objects that contain null. For this reason we override + * some fields in the StorageDescriptor of this notification. This should be fixed after + * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. + */ + @Override + public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { + if (partitionEvent.getStatus()) { + Partition partition = partitionEvent.getPartition(); + StorageDescriptor sd = partition.getSd(); + sd.setBucketCols(new ArrayList()); + sd.setSortCols(new ArrayList()); + sd.setParameters(new HashMap()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSkewedInfo().setSkewedColNames(new ArrayList()); + String topicName = getTopicName(partition, partitionEvent); + if (topicName != null && !topicName.equals("")) { + send(messageFactory.buildDropPartitionMessage(partitionEvent.getTable(), partition), topicName); + } else { + LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " + + partition.getDbName() + + "." + + partition.getTableName() + + " To enable notifications for this table, please do alter table set properties (" + + HCatConstants.HCAT_MSGBUS_TOPIC_NAME + + "=.) or whatever you want topic name to be."); + } } - - @Override - public void onCreateDatabase(CreateDatabaseEvent dbEvent) - throws MetaException { - // Subscriber can get notification about addition of a database in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_ADD_DATABASE" - if (dbEvent.getStatus()) { - String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); - send(messageFactory.buildCreateDatabaseMessage(dbEvent.getDatabase()), topicName); - } + } + + @Override + public void onCreateDatabase(CreateDatabaseEvent dbEvent) + throws MetaException { + // Subscriber can get notification about addition of a database in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_ADD_DATABASE" + if (dbEvent.getStatus()) { + String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); + send(messageFactory.buildCreateDatabaseMessage(dbEvent.getDatabase()), topicName); } - - @Override - public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { - // Subscriber can get notification about drop of a database in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_DROP_DATABASE" - if (dbEvent.getStatus()) { - String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); - send(messageFactory.buildDropDatabaseMessage(dbEvent.getDatabase()), topicName); - } + } + + @Override + public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { + // Subscriber can get notification about drop of a database in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_DROP_DATABASE" + if (dbEvent.getStatus()) { + String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); + send(messageFactory.buildDropDatabaseMessage(dbEvent.getDatabase()), topicName); } - - @Override - public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { - // Subscriber can get notification about addition of a table in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_ADD_TABLE" - if (tableEvent.getStatus()) { - Table tbl = tableEvent.getTable(); - HMSHandler handler = tableEvent.getHandler(); - HiveConf conf = handler.getHiveConf(); - Table newTbl; - try { - newTbl = handler.get_table(tbl.getDbName(), tbl.getTableName()) - .deepCopy(); - newTbl.getParameters().put( - HCatConstants.HCAT_MSGBUS_TOPIC_NAME, - getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase() + "." - + newTbl.getTableName().toLowerCase()); - handler.alter_table(newTbl.getDbName(), newTbl.getTableName(), newTbl); - } catch (InvalidOperationException e) { - MetaException me = new MetaException(e.toString()); - me.initCause(e); - throw me; - } catch (NoSuchObjectException e) { - MetaException me = new MetaException(e.toString()); - me.initCause(e); - throw me; - } - String topicName = getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase(); - send(messageFactory.buildCreateTableMessage(newTbl), topicName); - } + } + + @Override + public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { + // Subscriber can get notification about addition of a table in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_ADD_TABLE" + if (tableEvent.getStatus()) { + Table tbl = tableEvent.getTable(); + HMSHandler handler = tableEvent.getHandler(); + HiveConf conf = handler.getHiveConf(); + Table newTbl; + try { + newTbl = handler.get_table(tbl.getDbName(), tbl.getTableName()) + .deepCopy(); + newTbl.getParameters().put( + HCatConstants.HCAT_MSGBUS_TOPIC_NAME, + getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase() + "." + + newTbl.getTableName().toLowerCase()); + handler.alter_table(newTbl.getDbName(), newTbl.getTableName(), newTbl); + } catch (InvalidOperationException e) { + MetaException me = new MetaException(e.toString()); + me.initCause(e); + throw me; + } catch (NoSuchObjectException e) { + MetaException me = new MetaException(e.toString()); + me.initCause(e); + throw me; + } + String topicName = getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase(); + send(messageFactory.buildCreateTableMessage(newTbl), topicName); } - - private String getTopicPrefix(HiveConf conf) { - return conf.get(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, - HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + } + + private String getTopicPrefix(HiveConf conf) { + return conf.get(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + } + + /** + * Send dropped table notifications. Subscribers can receive these notifications for + * dropped tables by listening on topic "HCAT" with message selector string + * {@value org.apache.hcatalog.common.HCatConstants#HCAT_EVENT} = + * {@value org.apache.hcatalog.common.HCatConstants#HCAT_DROP_TABLE_EVENT} + *
+ * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been + * found to throw NPE when serializing objects that contain null. For this reason we override + * some fields in the StorageDescriptor of this notification. This should be fixed after + * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. + */ + @Override + public void onDropTable(DropTableEvent tableEvent) throws MetaException { + // Subscriber can get notification about drop of a table in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_DROP_TABLE" + + // Datanucleus throws NPE when we try to serialize a table object + // retrieved from metastore. To workaround that we reset following objects + + if (tableEvent.getStatus()) { + Table table = tableEvent.getTable(); + String topicName = getTopicPrefix(tableEvent.getHandler().getHiveConf()) + "." + table.getDbName().toLowerCase(); + send(messageFactory.buildDropTableMessage(table), topicName); } - - /** - * Send dropped table notifications. Subscribers can receive these notifications for - * dropped tables by listening on topic "HCAT" with message selector string - * {@value org.apache.hcatalog.common.HCatConstants#HCAT_EVENT} = - * {@value org.apache.hcatalog.common.HCatConstants#HCAT_DROP_TABLE_EVENT} - *
- * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been - * found to throw NPE when serializing objects that contain null. For this reason we override - * some fields in the StorageDescriptor of this notification. This should be fixed after - * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. - */ - @Override - public void onDropTable(DropTableEvent tableEvent) throws MetaException { - // Subscriber can get notification about drop of a table in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_DROP_TABLE" - - // Datanucleus throws NPE when we try to serialize a table object - // retrieved from metastore. To workaround that we reset following objects - - if (tableEvent.getStatus()) { - Table table = tableEvent.getTable(); - String topicName = getTopicPrefix(tableEvent.getHandler().getHiveConf()) + "." + table.getDbName().toLowerCase(); - send(messageFactory.buildDropTableMessage(table), topicName); + } + + /** + * @param hCatEventMessage The HCatEventMessage being sent over JMS. + * @param topicName is the name on message broker on which message is sent. + */ + protected void send(HCatEventMessage hCatEventMessage, String topicName) { + try { + if(null == session){ + // this will happen, if we never able to establish a connection. + createConnection(); + if (null == session){ + // Still not successful, return from here. + LOG.error("Invalid session. Failed to send message on topic: " + + topicName + " event: " + hCatEventMessage.getEventType()); + return; } + } + + Destination topic = getTopic(topicName); + + if (null == topic){ + // Still not successful, return from here. + LOG.error("Invalid session. Failed to send message on topic: " + + topicName + " event: " + hCatEventMessage.getEventType()); + return; + } + + MessageProducer producer = session.createProducer(topic); + Message msg = session.createTextMessage(hCatEventMessage.toString()); + + msg.setStringProperty(HCatConstants.HCAT_EVENT, hCatEventMessage.getEventType().toString()); + msg.setStringProperty(HCatConstants.HCAT_MESSAGE_VERSION, messageFactory.getVersion()); + msg.setStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT, messageFactory.getMessageFormat()); + producer.send(msg); + // Message must be transacted before we return. + session.commit(); } - - /** - * @param hCatEventMessage The HCatEventMessage being sent over JMS. - * @param topicName is the name on message broker on which message is sent. - */ - protected void send(HCatEventMessage hCatEventMessage, String topicName) { - try { - if(null == session){ - // this will happen, if we never able to establish a connection. - createConnection(); - if (null == session){ - // Still not successful, return from here. - LOG.error("Invalid session. Failed to send message on topic: " + - topicName + " event: " + hCatEventMessage.getEventType()); - return; - } - } - - Destination topic = getTopic(topicName); - - if (null == topic){ - // Still not successful, return from here. - LOG.error("Invalid session. Failed to send message on topic: " + - topicName + " event: " + hCatEventMessage.getEventType()); - return; - } - - MessageProducer producer = session.createProducer(topic); - Message msg = session.createTextMessage(hCatEventMessage.toString()); - - msg.setStringProperty(HCatConstants.HCAT_EVENT, hCatEventMessage.getEventType().toString()); - msg.setStringProperty(HCatConstants.HCAT_MESSAGE_VERSION, messageFactory.getVersion()); - msg.setStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT, messageFactory.getMessageFormat()); - producer.send(msg); - // Message must be transacted before we return. - session.commit(); - } - catch(Exception e){ - // Gobble up the exception. Message delivery is best effort. - LOG.error("Failed to send message on topic: " + topicName + - " event: " + hCatEventMessage.getEventType(), e); - } + catch(Exception e){ + // Gobble up the exception. Message delivery is best effort. + LOG.error("Failed to send message on topic: " + topicName + + " event: " + hCatEventMessage.getEventType(), e); } - - /** - * Get the topic object for the topicName, it also tries to reconnect - * if the connection appears to be broken. - * - * @param topicName The String identifying the message-topic. - * @return A {@link Topic} object corresponding to the specified topicName. - * @throws JMSException - */ - protected Topic getTopic(final String topicName) throws JMSException { - Topic topic; - try { - // Topics are created on demand. If it doesn't exist on broker it will - // be created when broker receives this message. - topic = session.createTopic(topicName); - } catch (IllegalStateException ise) { - // this will happen if we were able to establish connection once, but its no longer valid, - // ise is thrown, catch it and retry. - LOG.error("Seems like connection is lost. Retrying", ise); - createConnection(); - topic = session.createTopic(topicName); - } - return topic; + } + + /** + * Get the topic object for the topicName, it also tries to reconnect + * if the connection appears to be broken. + * + * @param topicName The String identifying the message-topic. + * @return A {@link Topic} object corresponding to the specified topicName. + * @throws JMSException + */ + protected Topic getTopic(final String topicName) throws JMSException { + Topic topic; + try { + // Topics are created on demand. If it doesn't exist on broker it will + // be created when broker receives this message. + topic = session.createTopic(topicName); + } catch (IllegalStateException ise) { + // this will happen if we were able to establish connection once, but its no longer valid, + // ise is thrown, catch it and retry. + LOG.error("Seems like connection is lost. Retrying", ise); + createConnection(); + topic = session.createTopic(topicName); } - - protected void createConnection() { - - Context jndiCntxt; - try { - jndiCntxt = new InitialContext(); - ConnectionFactory connFac = (ConnectionFactory) jndiCntxt - .lookup("ConnectionFactory"); - Connection conn = connFac.createConnection(); - conn.start(); - conn.setExceptionListener(new ExceptionListener() { - @Override - public void onException(JMSException jmse) { - LOG.error(jmse.toString()); - } - }); - // We want message to be sent when session commits, thus we run in - // transacted mode. - session = conn.createSession(true, Session.SESSION_TRANSACTED); - } catch (NamingException e) { - LOG.error("JNDI error while setting up Message Bus connection. " - + "Please make sure file named 'jndi.properties' is in " - + "classpath and contains appropriate key-value pairs.", e); - } catch (JMSException e) { - LOG.error("Failed to initialize connection to message bus", e); - } catch (Throwable t) { - LOG.error("Unable to connect to JMS provider", t); + return topic; + } + + protected void createConnection() { + + Context jndiCntxt; + try { + jndiCntxt = new InitialContext(); + ConnectionFactory connFac = (ConnectionFactory) jndiCntxt + .lookup("ConnectionFactory"); + Connection conn = connFac.createConnection(); + conn.start(); + conn.setExceptionListener(new ExceptionListener() { + @Override + public void onException(JMSException jmse) { + LOG.error(jmse.toString()); } + }); + // We want message to be sent when session commits, thus we run in + // transacted mode. + session = conn.createSession(true, Session.SESSION_TRANSACTED); + } catch (NamingException e) { + LOG.error("JNDI error while setting up Message Bus connection. " + + "Please make sure file named 'jndi.properties' is in " + + "classpath and contains appropriate key-value pairs.", e); + } catch (JMSException e) { + LOG.error("Failed to initialize connection to message bus", e); + } catch (Throwable t) { + LOG.error("Unable to connect to JMS provider", t); } - - @Override - protected void finalize() throws Throwable { - // Close the connection before dying. - try { - if (null != session) - session.close(); - if (conn != null) { - conn.close(); - } - - } catch (Exception ignore) { - LOG.info("Failed to close message bus connection.", ignore); - } + } + + @Override + protected void finalize() throws Throwable { + // Close the connection before dying. + try { + if (null != session) + session.close(); + if (conn != null) { + conn.close(); + } + + } catch (Exception ignore) { + LOG.info("Failed to close message bus connection.", ignore); } + } - @Override - public void onLoadPartitionDone(LoadPartitionDoneEvent lpde) - throws MetaException { + @Override + public void onLoadPartitionDone(LoadPartitionDoneEvent lpde) + throws MetaException { // TODO: Fix LoadPartitionDoneEvent. Currently, LPDE can only carry a single partition-spec. And that defeats the purpose. -// if(lpde.getStatus()) -// send(lpde.getPartitionName(),lpde.getTable().getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME),HCatConstants.HCAT_PARTITION_DONE_EVENT); - } - - @Override - public void onAlterPartition(AlterPartitionEvent ape) throws MetaException { - // no-op - } - - @Override - public void onAlterTable(AlterTableEvent ate) throws MetaException { - // no-op - } +// if(lpde.getStatus()) +// send(lpde.getPartitionName(),lpde.getTable().getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME),HCatConstants.HCAT_PARTITION_DONE_EVENT); + } + + @Override + public void onAlterPartition(AlterPartitionEvent ape) throws MetaException { + // no-op + } + + @Override + public void onAlterTable(AlterTableEvent ate) throws MetaException { + // no-op + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/AddPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/AddPartitionMessage.java index 1a6e868..def15ec 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/AddPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/AddPartitionMessage.java @@ -28,28 +28,28 @@ */ public abstract class AddPartitionMessage extends HCatEventMessage { - protected AddPartitionMessage() { - super(EventType.ADD_PARTITION); - } + protected AddPartitionMessage() { + super(EventType.ADD_PARTITION); + } - /** - * Getter for name of table (where partitions are added). - * @return Table-name (String). - */ - public abstract String getTable(); + /** + * Getter for name of table (where partitions are added). + * @return Table-name (String). + */ + public abstract String getTable(); - /** - * Getter for list of partitions added. - * @return List of maps, where each map identifies values for each partition-key, for every added partition. - */ - public abstract List> getPartitions (); + /** + * Getter for list of partitions added. + * @return List of maps, where each map identifies values for each partition-key, for every added partition. + */ + public abstract List> getPartitions (); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - if (getPartitions() == null) - throw new IllegalStateException("Partition-list unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + if (getPartitions() == null) + throw new IllegalStateException("Partition-list unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateDatabaseMessage.java index 76b0702..f714cef 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateDatabaseMessage.java @@ -25,8 +25,8 @@ */ public abstract class CreateDatabaseMessage extends HCatEventMessage { - protected CreateDatabaseMessage() { - super(EventType.CREATE_DATABASE); - } + protected CreateDatabaseMessage() { + super(EventType.CREATE_DATABASE); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateTableMessage.java index e97a35e..58bbc4f 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateTableMessage.java @@ -25,20 +25,20 @@ */ public abstract class CreateTableMessage extends HCatEventMessage { - protected CreateTableMessage() { - super(EventType.CREATE_TABLE); - } + protected CreateTableMessage() { + super(EventType.CREATE_TABLE); + } - /** - * Getter for the name of table created in HCatalog. - * @return Table-name (String). - */ - public abstract String getTable(); + /** + * Getter for the name of table created in HCatalog. + * @return Table-name (String). + */ + public abstract String getTable(); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropDatabaseMessage.java index 7d231c5..6a28e49 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropDatabaseMessage.java @@ -25,7 +25,7 @@ */ public abstract class DropDatabaseMessage extends HCatEventMessage { - protected DropDatabaseMessage() { - super(EventType.DROP_DATABASE); - } + protected DropDatabaseMessage() { + super(EventType.DROP_DATABASE); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropPartitionMessage.java index 829e629..88c8a34 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropPartitionMessage.java @@ -28,19 +28,19 @@ */ public abstract class DropPartitionMessage extends HCatEventMessage { - protected DropPartitionMessage() { - super(EventType.DROP_PARTITION); - } + protected DropPartitionMessage() { + super(EventType.DROP_PARTITION); + } - public abstract String getTable(); - public abstract List> getPartitions (); + public abstract String getTable(); + public abstract List> getPartitions (); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - if (getPartitions() == null) - throw new IllegalStateException("Partition-list unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + if (getPartitions() == null) + throw new IllegalStateException("Partition-list unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropTableMessage.java index 8b2fcbc..fbb2c73 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropTableMessage.java @@ -25,20 +25,20 @@ */ public abstract class DropTableMessage extends HCatEventMessage { - protected DropTableMessage() { - super(EventType.DROP_TABLE); - } + protected DropTableMessage() { + super(EventType.DROP_TABLE); + } - /** - * Getter for the name of the table being dropped. - * @return Table-name (String). - */ - public abstract String getTable(); + /** + * Getter for the name of the table being dropped. + * @return Table-name (String). + */ + public abstract String getTable(); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/HCatEventMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/HCatEventMessage.java index af67f16..990f8c9 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/HCatEventMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/HCatEventMessage.java @@ -28,74 +28,74 @@ */ public abstract class HCatEventMessage { - /** - * Enumeration of all supported types of Metastore operations. - */ - public static enum EventType { - - CREATE_DATABASE(HCatConstants.HCAT_CREATE_DATABASE_EVENT), - DROP_DATABASE(HCatConstants.HCAT_DROP_DATABASE_EVENT), - CREATE_TABLE(HCatConstants.HCAT_CREATE_TABLE_EVENT), - DROP_TABLE(HCatConstants.HCAT_DROP_TABLE_EVENT), - ADD_PARTITION(HCatConstants.HCAT_ADD_PARTITION_EVENT), - DROP_PARTITION(HCatConstants.HCAT_DROP_PARTITION_EVENT); - - private String typeString; - - EventType(String typeString) { - this.typeString = typeString; - } - - @Override - public String toString() { return typeString; } + /** + * Enumeration of all supported types of Metastore operations. + */ + public static enum EventType { + + CREATE_DATABASE(HCatConstants.HCAT_CREATE_DATABASE_EVENT), + DROP_DATABASE(HCatConstants.HCAT_DROP_DATABASE_EVENT), + CREATE_TABLE(HCatConstants.HCAT_CREATE_TABLE_EVENT), + DROP_TABLE(HCatConstants.HCAT_DROP_TABLE_EVENT), + ADD_PARTITION(HCatConstants.HCAT_ADD_PARTITION_EVENT), + DROP_PARTITION(HCatConstants.HCAT_DROP_PARTITION_EVENT); + + private String typeString; + + EventType(String typeString) { + this.typeString = typeString; } - protected EventType eventType; - - protected HCatEventMessage(EventType eventType) { - this.eventType = eventType; - } - - public EventType getEventType() { - return eventType; - } - - /** - * Getter for HCatalog Server's URL. - * (This is where the event originates from.) - * @return HCatalog Server's URL (String). - */ - public abstract String getServer(); - - /** - * Getter for the Kerberos principal of the HCatalog service. - * @return HCatalog Service Principal (String). - */ - public abstract String getServicePrincipal(); - - /** - * Getter for the name of the Database on which the Metastore operation is done. - * @return Database-name (String). - */ - public abstract String getDB(); - - /** - * Getter for the timestamp associated with the operation. - * @return Timestamp (Long - seconds since epoch). - */ - public abstract Long getTimestamp(); - - /** - * Class invariant. Checked after construction or deserialization. - */ - public HCatEventMessage checkValid() { - if (getServer() == null || getServicePrincipal() == null) - throw new IllegalStateException("Server-URL/Service-Principal shouldn't be null."); - if (getEventType() == null) - throw new IllegalStateException("Event-type unset."); - if (getDB() == null) - throw new IllegalArgumentException("DB-name unset."); - - return this; - } + @Override + public String toString() { return typeString; } + } + + protected EventType eventType; + + protected HCatEventMessage(EventType eventType) { + this.eventType = eventType; + } + + public EventType getEventType() { + return eventType; + } + + /** + * Getter for HCatalog Server's URL. + * (This is where the event originates from.) + * @return HCatalog Server's URL (String). + */ + public abstract String getServer(); + + /** + * Getter for the Kerberos principal of the HCatalog service. + * @return HCatalog Service Principal (String). + */ + public abstract String getServicePrincipal(); + + /** + * Getter for the name of the Database on which the Metastore operation is done. + * @return Database-name (String). + */ + public abstract String getDB(); + + /** + * Getter for the timestamp associated with the operation. + * @return Timestamp (Long - seconds since epoch). + */ + public abstract Long getTimestamp(); + + /** + * Class invariant. Checked after construction or deserialization. + */ + public HCatEventMessage checkValid() { + if (getServer() == null || getServicePrincipal() == null) + throw new IllegalStateException("Server-URL/Service-Principal shouldn't be null."); + if (getEventType() == null) + throw new IllegalStateException("Event-type unset."); + if (getDB() == null) + throw new IllegalArgumentException("DB-name unset."); + + return this; + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageDeserializer.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageDeserializer.java index 1012172..5d069a3 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageDeserializer.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageDeserializer.java @@ -25,60 +25,60 @@ */ public abstract class MessageDeserializer { - /** - * Method to construct HCatEventMessage from string. - */ - public HCatEventMessage getHCatEventMessage(String eventTypeString, String messageBody) { + /** + * Method to construct HCatEventMessage from string. + */ + public HCatEventMessage getHCatEventMessage(String eventTypeString, String messageBody) { - switch (HCatEventMessage.EventType.valueOf(eventTypeString)) { - case CREATE_DATABASE: - return getCreateDatabaseMessage(messageBody); - case DROP_DATABASE: - return getDropDatabaseMessage(messageBody); - case CREATE_TABLE: - return getCreateTableMessage(messageBody); - case DROP_TABLE: - return getDropTableMessage(messageBody); - case ADD_PARTITION: - return getAddPartitionMessage(messageBody); - case DROP_PARTITION: - return getDropPartitionMessage(messageBody); + switch (HCatEventMessage.EventType.valueOf(eventTypeString)) { + case CREATE_DATABASE: + return getCreateDatabaseMessage(messageBody); + case DROP_DATABASE: + return getDropDatabaseMessage(messageBody); + case CREATE_TABLE: + return getCreateTableMessage(messageBody); + case DROP_TABLE: + return getDropTableMessage(messageBody); + case ADD_PARTITION: + return getAddPartitionMessage(messageBody); + case DROP_PARTITION: + return getDropPartitionMessage(messageBody); - default: - throw new IllegalArgumentException("Unsupported event-type: " + eventTypeString); - } + default: + throw new IllegalArgumentException("Unsupported event-type: " + eventTypeString); } + } - /** - * Method to de-serialize CreateDatabaseMessage instance. - */ - public abstract CreateDatabaseMessage getCreateDatabaseMessage(String messageBody); + /** + * Method to de-serialize CreateDatabaseMessage instance. + */ + public abstract CreateDatabaseMessage getCreateDatabaseMessage(String messageBody); - /** - * Method to de-serialize DropDatabaseMessage instance. - */ - public abstract DropDatabaseMessage getDropDatabaseMessage(String messageBody); + /** + * Method to de-serialize DropDatabaseMessage instance. + */ + public abstract DropDatabaseMessage getDropDatabaseMessage(String messageBody); - /** - * Method to de-serialize CreateTableMessage instance. - */ - public abstract CreateTableMessage getCreateTableMessage(String messageBody); + /** + * Method to de-serialize CreateTableMessage instance. + */ + public abstract CreateTableMessage getCreateTableMessage(String messageBody); - /** - * Method to de-serialize DropTableMessage instance. - */ - public abstract DropTableMessage getDropTableMessage(String messageBody); + /** + * Method to de-serialize DropTableMessage instance. + */ + public abstract DropTableMessage getDropTableMessage(String messageBody); - /** - * Method to de-serialize AddPartitionMessage instance. - */ - public abstract AddPartitionMessage getAddPartitionMessage(String messageBody); + /** + * Method to de-serialize AddPartitionMessage instance. + */ + public abstract AddPartitionMessage getAddPartitionMessage(String messageBody); - /** - * Method to de-serialize DropPartitionMessage instance. - */ - public abstract DropPartitionMessage getDropPartitionMessage(String messageBody); + /** + * Method to de-serialize DropPartitionMessage instance. + */ + public abstract DropPartitionMessage getDropPartitionMessage(String messageBody); - // Protection against construction. - protected MessageDeserializer() {} + // Protection against construction. + protected MessageDeserializer() {} } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageFactory.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageFactory.java index 36da95d..7de815d 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageFactory.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageFactory.java @@ -32,108 +32,108 @@ */ public abstract class MessageFactory { - private static MessageFactory instance = new JSONMessageFactory(); - - protected static final HiveConf hiveConf = new HiveConf(); - static { - hiveConf.addResource("hive-site.xml"); + private static MessageFactory instance = new JSONMessageFactory(); + + protected static final HiveConf hiveConf = new HiveConf(); + static { + hiveConf.addResource("hive-site.xml"); + } + + private static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; + private static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; + private static final String HCAT_MESSAGE_FORMAT = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FORMAT, "json"); + private static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hcatalog.messaging.json.JSONMessageFactory"; + private static final String HCAT_MESSAGE_FACTORY_IMPL = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + + HCAT_MESSAGE_FORMAT, + DEFAULT_MESSAGE_FACTORY_IMPL); + + protected static final String HCAT_SERVER_URL = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.name(), ""); + protected static final String HCAT_SERVICE_PRINCIPAL = hiveConf.get(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.name(), ""); + + /** + * Getter for MessageFactory instance. + */ + public static MessageFactory getInstance() { + if (instance == null) { + instance = getInstance(HCAT_MESSAGE_FACTORY_IMPL); } + return instance; + } - private static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; - private static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; - private static final String HCAT_MESSAGE_FORMAT = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FORMAT, "json"); - private static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hcatalog.messaging.json.JSONMessageFactory"; - private static final String HCAT_MESSAGE_FACTORY_IMPL = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX - + HCAT_MESSAGE_FORMAT, - DEFAULT_MESSAGE_FACTORY_IMPL); - - protected static final String HCAT_SERVER_URL = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.name(), ""); - protected static final String HCAT_SERVICE_PRINCIPAL = hiveConf.get(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.name(), ""); - - /** - * Getter for MessageFactory instance. - */ - public static MessageFactory getInstance() { - if (instance == null) { - instance = getInstance(HCAT_MESSAGE_FACTORY_IMPL); - } - return instance; + private static MessageFactory getInstance(String className) { + try { + return (MessageFactory)ReflectionUtils.newInstance(Class.forName(className), hiveConf); } - - private static MessageFactory getInstance(String className) { - try { - return (MessageFactory)ReflectionUtils.newInstance(Class.forName(className), hiveConf); - } - catch (ClassNotFoundException classNotFound) { - throw new IllegalStateException("Could not construct MessageFactory implementation: ", classNotFound); - } + catch (ClassNotFoundException classNotFound) { + throw new IllegalStateException("Could not construct MessageFactory implementation: ", classNotFound); } - - /** - * Getter for MessageDeserializer, corresponding to the specified format and version. - * @param format Serialization format for notifications. - * @param version Version of serialization format (currently ignored.) - * @return MessageDeserializer. - */ - public static MessageDeserializer getDeserializer(String format, - String version) { - return getInstance(hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + format, - DEFAULT_MESSAGE_FACTORY_IMPL)).getDeserializer(); - } - - public abstract MessageDeserializer getDeserializer(); - - /** - * Getter for version-string, corresponding to all constructed messages. - */ - public abstract String getVersion(); - - /** - * Getter for message-format. - */ - public abstract String getMessageFormat(); - - /** - * Factory method for CreateDatabaseMessage. - * @param db The Database being added. - * @return CreateDatabaseMessage instance. - */ - public abstract CreateDatabaseMessage buildCreateDatabaseMessage(Database db); - - /** - * Factory method for DropDatabaseMessage. - * @param db The Database being dropped. - * @return DropDatabaseMessage instance. - */ - public abstract DropDatabaseMessage buildDropDatabaseMessage(Database db); - - /** - * Factory method for CreateTableMessage. - * @param table The Table being created. - * @return CreateTableMessage instance. - */ - public abstract CreateTableMessage buildCreateTableMessage(Table table); - - /** - * Factory method for DropTableMessage. - * @param table The Table being dropped. - * @return DropTableMessage instance. - */ - public abstract DropTableMessage buildDropTableMessage(Table table); - - /** - * Factory method for AddPartitionMessage. - * @param table The Table to which the partition is added. - * @param partition The Partition being added. - * @return AddPartitionMessage instance. - */ - public abstract AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition); - - /** - * Factory method for DropPartitionMessage. - * @param table The Table from which the partition is dropped. - * @param partition The Partition being dropped. - * @return DropPartitionMessage instance. - */ - public abstract DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition); + } + + /** + * Getter for MessageDeserializer, corresponding to the specified format and version. + * @param format Serialization format for notifications. + * @param version Version of serialization format (currently ignored.) + * @return MessageDeserializer. + */ + public static MessageDeserializer getDeserializer(String format, + String version) { + return getInstance(hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + format, + DEFAULT_MESSAGE_FACTORY_IMPL)).getDeserializer(); + } + + public abstract MessageDeserializer getDeserializer(); + + /** + * Getter for version-string, corresponding to all constructed messages. + */ + public abstract String getVersion(); + + /** + * Getter for message-format. + */ + public abstract String getMessageFormat(); + + /** + * Factory method for CreateDatabaseMessage. + * @param db The Database being added. + * @return CreateDatabaseMessage instance. + */ + public abstract CreateDatabaseMessage buildCreateDatabaseMessage(Database db); + + /** + * Factory method for DropDatabaseMessage. + * @param db The Database being dropped. + * @return DropDatabaseMessage instance. + */ + public abstract DropDatabaseMessage buildDropDatabaseMessage(Database db); + + /** + * Factory method for CreateTableMessage. + * @param table The Table being created. + * @return CreateTableMessage instance. + */ + public abstract CreateTableMessage buildCreateTableMessage(Table table); + + /** + * Factory method for DropTableMessage. + * @param table The Table being dropped. + * @return DropTableMessage instance. + */ + public abstract DropTableMessage buildDropTableMessage(Table table); + + /** + * Factory method for AddPartitionMessage. + * @param table The Table to which the partition is added. + * @param partition The Partition being added. + * @return AddPartitionMessage instance. + */ + public abstract AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition); + + /** + * Factory method for DropPartitionMessage. + * @param table The Table from which the partition is dropped. + * @param partition The Partition being dropped. + * @return DropPartitionMessage instance. + */ + public abstract DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition); } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/jms/MessagingUtils.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/jms/MessagingUtils.java index b8d3050..ffe9cac 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/jms/MessagingUtils.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/jms/MessagingUtils.java @@ -35,29 +35,29 @@ */ public class MessagingUtils { - /** - * Method to return HCatEventMessage contained in the JMS message. - * @param message The JMS Message instance - * @return The contained HCatEventMessage - */ - public static HCatEventMessage getMessage(Message message) { - try { - String messageBody = ((TextMessage)message).getText(); - String eventType = message.getStringProperty(HCatConstants.HCAT_EVENT); - String messageVersion = message.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); - String messageFormat = message.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); - - if (StringUtils.isEmpty(messageBody) || StringUtils.isEmpty(eventType)) - throw new IllegalArgumentException("Could not extract HCatEventMessage. " + - "EventType and/or MessageBody is null/empty."); - - return MessageFactory.getDeserializer(messageFormat, messageVersion).getHCatEventMessage(eventType, messageBody); - } - catch (JMSException exception) { - throw new IllegalArgumentException("Could not extract HCatEventMessage. ", exception); - } + /** + * Method to return HCatEventMessage contained in the JMS message. + * @param message The JMS Message instance + * @return The contained HCatEventMessage + */ + public static HCatEventMessage getMessage(Message message) { + try { + String messageBody = ((TextMessage)message).getText(); + String eventType = message.getStringProperty(HCatConstants.HCAT_EVENT); + String messageVersion = message.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); + String messageFormat = message.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); + + if (StringUtils.isEmpty(messageBody) || StringUtils.isEmpty(eventType)) + throw new IllegalArgumentException("Could not extract HCatEventMessage. " + + "EventType and/or MessageBody is null/empty."); + + return MessageFactory.getDeserializer(messageFormat, messageVersion).getHCatEventMessage(eventType, messageBody); } + catch (JMSException exception) { + throw new IllegalArgumentException("Could not extract HCatEventMessage. ", exception); + } + } - // Prevent construction. - private MessagingUtils() {} + // Prevent construction. + private MessagingUtils() {} } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONAddPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONAddPartitionMessage.java index bc7f433..acdc807 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONAddPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONAddPartitionMessage.java @@ -31,56 +31,56 @@ */ public class JSONAddPartitionMessage extends AddPartitionMessage { - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - @JsonProperty - List> partitions; - - /** - * Default Constructor. Required for Jackson. - */ - public JSONAddPartitionMessage() {} - - public JSONAddPartitionMessage(String server, String servicePrincipal, String db, String table, - List> partitions, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.partitions = partitions; - this.timestamp = timestamp; - checkValid(); - } + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + @JsonProperty + List> partitions; - @Override - public String getServer() { return server; } + /** + * Default Constructor. Required for Jackson. + */ + public JSONAddPartitionMessage() {} - @Override - public String getServicePrincipal() { return servicePrincipal; } + public JSONAddPartitionMessage(String server, String servicePrincipal, String db, String table, + List> partitions, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.partitions = partitions; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getDB() { return db; } + @Override + public String getServer() { return server; } - @Override - public String getTable() { return table; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public String getDB() { return db; } - @Override - public List> getPartitions () { return partitions; } + @Override + public String getTable() { return table; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public List> getPartitions () { return partitions; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateDatabaseMessage.java index 1e28082..037d70e 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateDatabaseMessage.java @@ -28,45 +28,45 @@ */ public class JSONCreateDatabaseMessage extends CreateDatabaseMessage { - @JsonProperty - String server, servicePrincipal, db; + @JsonProperty + String server, servicePrincipal, db; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, required for Jackson. - */ - public JSONCreateDatabaseMessage() {} + /** + * Default constructor, required for Jackson. + */ + public JSONCreateDatabaseMessage() {} - public JSONCreateDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.timestamp = timestamp; - checkValid(); - } + public JSONCreateDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateTableMessage.java index 35e1e4e..6cbc3c8 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateTableMessage.java @@ -28,48 +28,48 @@ */ public class JSONCreateTableMessage extends CreateTableMessage { - @JsonProperty - String server, servicePrincipal, db, table; + @JsonProperty + String server, servicePrincipal, db, table; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, needed for Jackson. - */ - public JSONCreateTableMessage() {} + /** + * Default constructor, needed for Jackson. + */ + public JSONCreateTableMessage() {} - public JSONCreateTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.timestamp = timestamp; - checkValid(); - } + public JSONCreateTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String getTable() { return table; } + @Override + public String getTable() { return table; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropDatabaseMessage.java index c567949..a27870f 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropDatabaseMessage.java @@ -28,45 +28,45 @@ */ public class JSONDropDatabaseMessage extends DropDatabaseMessage { - @JsonProperty - String server, servicePrincipal, db; + @JsonProperty + String server, servicePrincipal, db; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, required for Jackson. - */ - public JSONDropDatabaseMessage() {} + /** + * Default constructor, required for Jackson. + */ + public JSONDropDatabaseMessage() {} - public JSONDropDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.timestamp = timestamp; - checkValid(); - } + public JSONDropDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropPartitionMessage.java index 0e418aa..000325d 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropPartitionMessage.java @@ -31,57 +31,57 @@ */ public class JSONDropPartitionMessage extends DropPartitionMessage { - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - @JsonProperty - List> partitions; - - /** - * Default Constructor. Required for Jackson. - */ - public JSONDropPartitionMessage() {} - - public JSONDropPartitionMessage(String server, String servicePrincipal, String db, String table, - List> partitions, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.partitions = partitions; - this.timestamp = timestamp; - checkValid(); - } + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + @JsonProperty + List> partitions; + /** + * Default Constructor. Required for Jackson. + */ + public JSONDropPartitionMessage() {} - @Override - public String getServer() { return server; } + public JSONDropPartitionMessage(String server, String servicePrincipal, String db, String table, + List> partitions, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.partitions = partitions; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getServer() { return server; } - @Override - public String getTable() { return table; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public String getDB() { return db; } - @Override - public List> getPartitions () { return partitions; } + @Override + public String getTable() { return table; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public List> getPartitions () { return partitions; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropTableMessage.java index 505dd94..635d312 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropTableMessage.java @@ -28,50 +28,50 @@ */ public class JSONDropTableMessage extends DropTableMessage { - @JsonProperty - String server, servicePrincipal, db, table; + @JsonProperty + String server, servicePrincipal, db, table; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, needed for Jackson. - */ - public JSONDropTableMessage() {} + /** + * Default constructor, needed for Jackson. + */ + public JSONDropTableMessage() {} - public JSONDropTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.timestamp = timestamp; - checkValid(); - } + public JSONDropTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getTable() { return table; } + @Override + public String getTable() { return table; } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageDeserializer.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageDeserializer.java index 6fdf7d0..94226bb 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageDeserializer.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageDeserializer.java @@ -35,69 +35,69 @@ */ public class JSONMessageDeserializer extends MessageDeserializer { - static ObjectMapper mapper = new ObjectMapper(); // Thread-safe. + static ObjectMapper mapper = new ObjectMapper(); // Thread-safe. - static { - mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); - } + static { + mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } - @Override - public CreateDatabaseMessage getCreateDatabaseMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONCreateDatabaseMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONCreateDatabaseMessage.", exception); - } + @Override + public CreateDatabaseMessage getCreateDatabaseMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONCreateDatabaseMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONCreateDatabaseMessage.", exception); } + } - @Override - public DropDatabaseMessage getDropDatabaseMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropDatabaseMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONDropDatabaseMessage.", exception); - } + @Override + public DropDatabaseMessage getDropDatabaseMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropDatabaseMessage.class); } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONDropDatabaseMessage.", exception); + } + } - @Override - public CreateTableMessage getCreateTableMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONCreateTableMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONCreateTableMessage.", exception); - } + @Override + public CreateTableMessage getCreateTableMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONCreateTableMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONCreateTableMessage.", exception); } + } - @Override - public DropTableMessage getDropTableMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropTableMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONDropTableMessage.", exception); - } + @Override + public DropTableMessage getDropTableMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropTableMessage.class); } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONDropTableMessage.", exception); + } + } - @Override - public AddPartitionMessage getAddPartitionMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONAddPartitionMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct AddPartitionMessage.", exception); - } + @Override + public AddPartitionMessage getAddPartitionMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONAddPartitionMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct AddPartitionMessage.", exception); } + } - @Override - public DropPartitionMessage getDropPartitionMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropPartitionMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct DropPartitionMessage.", exception); - } + @Override + public DropPartitionMessage getDropPartitionMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropPartitionMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct DropPartitionMessage.", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageFactory.java b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageFactory.java index 85e3fdb..d366e22 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageFactory.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageFactory.java @@ -42,66 +42,66 @@ */ public class JSONMessageFactory extends MessageFactory { - private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); + private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); - @Override - public MessageDeserializer getDeserializer() { - return deserializer; - } + @Override + public MessageDeserializer getDeserializer() { + return deserializer; + } - @Override - public String getVersion() { - return "0.1"; - } + @Override + public String getVersion() { + return "0.1"; + } - @Override - public String getMessageFormat() { - return "json"; - } + @Override + public String getMessageFormat() { + return "json"; + } - @Override - public CreateDatabaseMessage buildCreateDatabaseMessage(Database db) { - return new JSONCreateDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), - System.currentTimeMillis() / 1000); - } + @Override + public CreateDatabaseMessage buildCreateDatabaseMessage(Database db) { + return new JSONCreateDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), + System.currentTimeMillis() / 1000); + } - @Override - public DropDatabaseMessage buildDropDatabaseMessage(Database db) { - return new JSONDropDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), - System.currentTimeMillis() / 1000); - } + @Override + public DropDatabaseMessage buildDropDatabaseMessage(Database db) { + return new JSONDropDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), + System.currentTimeMillis() / 1000); + } - @Override - public CreateTableMessage buildCreateTableMessage(Table table) { - return new JSONCreateTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), - table.getTableName(), System.currentTimeMillis()/1000); - } + @Override + public CreateTableMessage buildCreateTableMessage(Table table) { + return new JSONCreateTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), + table.getTableName(), System.currentTimeMillis()/1000); + } - @Override - public DropTableMessage buildDropTableMessage(Table table) { - return new JSONDropTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), table.getTableName(), - System.currentTimeMillis()/1000); - } + @Override + public DropTableMessage buildDropTableMessage(Table table) { + return new JSONDropTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), table.getTableName(), + System.currentTimeMillis()/1000); + } - @Override - public AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition) { - return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), - partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), - System.currentTimeMillis()/1000); - } + @Override + public AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition) { + return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), + partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), + System.currentTimeMillis()/1000); + } - @Override - public DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition) { - return new JSONDropPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), - partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), - System.currentTimeMillis()/1000); - } + @Override + public DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition) { + return new JSONDropPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), + partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), + System.currentTimeMillis()/1000); + } - private static Map getPartitionKeyValues(Table table, Partition partition) { - Map partitionKeys = new LinkedHashMap(); - for (int i=0; i getPartitionKeyValues(Table table, Partition partition) { + Map partitionKeys = new LinkedHashMap(); + for (int i=0; i session = new ThreadLocal() { - @Override - protected Session initialValue() { - try { - return createSession(); - } catch (Exception e) { - LOG.error("Couldn't create JMS Session", e); - return null; - } - } - - @Override - public void remove() { - if (get() != null) { - try { - get().close(); - } catch (Exception e) { - LOG.error("Unable to close bad JMS session, ignored error", e); - } - } - super.remove(); - } - }; - - /** - * Create message bus connection and session in constructor. - */ - public NotificationListener(final Configuration conf) { - super(conf); - testAndCreateConnection(); - } - - private static String getTopicName(Partition partition, - ListenerEvent partitionEvent) throws MetaException { - try { - return partitionEvent.getHandler() - .get_table(partition.getDbName(), partition.getTableName()) - .getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); - } catch (NoSuchObjectException e) { - throw new MetaException(e.toString()); - } - } - - @Override - public void onAddPartition(AddPartitionEvent partitionEvent) - throws MetaException { - // Subscriber can get notification of newly add partition in a - // particular table by listening on a topic named "dbName.tableName" - // and message selector string as "HCAT_EVENT = HCAT_ADD_PARTITION" - if (partitionEvent.getStatus()) { - - Partition partition = partitionEvent.getPartition(); - String topicName = getTopicName(partition, partitionEvent); - if (topicName != null && !topicName.equals("")) { - send(messageFactory.buildAddPartitionMessage(partitionEvent.getTable(), partition), topicName); - } else { - LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " - + partition.getDbName() - + "." - + partition.getTableName() - + " To enable notifications for this table, please do alter table set properties (" - + HCatConstants.HCAT_MSGBUS_TOPIC_NAME - + "=.) or whatever you want topic name to be."); - } - } - - } - - /** - * Send dropped partition notifications. Subscribers can receive these notifications for a - * particular table by listening on a topic named "dbName.tableName" with message selector - * string {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_EVENT} = - * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_DROP_PARTITION_EVENT}. - *
- * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been - * found to throw NPE when serializing objects that contain null. For this reason we override - * some fields in the StorageDescriptor of this notification. This should be fixed after - * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. - */ - @Override - public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { - if (partitionEvent.getStatus()) { - Partition partition = partitionEvent.getPartition(); - StorageDescriptor sd = partition.getSd(); - sd.setBucketCols(new ArrayList()); - sd.setSortCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSkewedInfo().setSkewedColNames(new ArrayList()); - String topicName = getTopicName(partition, partitionEvent); - if (topicName != null && !topicName.equals("")) { - send(messageFactory.buildDropPartitionMessage(partitionEvent.getTable(), partition), topicName); - } else { - LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " - + partition.getDbName() - + "." - + partition.getTableName() - + " To enable notifications for this table, please do alter table set properties (" - + HCatConstants.HCAT_MSGBUS_TOPIC_NAME - + "=.) or whatever you want topic name to be."); - } - } - } - + private static final Logger LOG = LoggerFactory.getLogger(NotificationListener.class); + protected Connection conn; + private static MessageFactory messageFactory = MessageFactory.getInstance(); + public static final int NUM_RETRIES = 1; + private static final String HEALTH_CHECK_TOPIC_SUFFIX = "jms_health_check"; + private static final String HEALTH_CHECK_MSG = "HCAT_JMS_HEALTH_CHECK_MESSAGE"; + + protected final ThreadLocal session = new ThreadLocal() { @Override - public void onCreateDatabase(CreateDatabaseEvent dbEvent) - throws MetaException { - // Subscriber can get notification about addition of a database in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_ADD_DATABASE" - if (dbEvent.getStatus()) { - String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); - send(messageFactory.buildCreateDatabaseMessage(dbEvent.getDatabase()), topicName); - } + protected Session initialValue() { + try { + return createSession(); + } catch (Exception e) { + LOG.error("Couldn't create JMS Session", e); + return null; + } } @Override - public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { - // Subscriber can get notification about drop of a database in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_DROP_DATABASE" - if (dbEvent.getStatus()) { - String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); - send(messageFactory.buildDropDatabaseMessage(dbEvent.getDatabase()), topicName); + public void remove() { + if (get() != null) { + try { + get().close(); + } catch (Exception e) { + LOG.error("Unable to close bad JMS session, ignored error", e); } + } + super.remove(); } - - @Override - public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { - // Subscriber can get notification about addition of a table in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_ADD_TABLE" - if (tableEvent.getStatus()) { - Table tbl = tableEvent.getTable(); - HMSHandler handler = tableEvent.getHandler(); - HiveConf conf = handler.getHiveConf(); - Table newTbl; - try { - newTbl = handler.get_table(tbl.getDbName(), tbl.getTableName()) - .deepCopy(); - newTbl.getParameters().put( - HCatConstants.HCAT_MSGBUS_TOPIC_NAME, - getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase() + "." - + newTbl.getTableName().toLowerCase()); - handler.alter_table(newTbl.getDbName(), newTbl.getTableName(), newTbl); - } catch (InvalidOperationException e) { - MetaException me = new MetaException(e.toString()); - me.initCause(e); - throw me; - } catch (NoSuchObjectException e) { - MetaException me = new MetaException(e.toString()); - me.initCause(e); - throw me; - } - String topicName = getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase(); - send(messageFactory.buildCreateTableMessage(newTbl), topicName); - } + }; + + /** + * Create message bus connection and session in constructor. + */ + public NotificationListener(final Configuration conf) { + super(conf); + testAndCreateConnection(); + } + + private static String getTopicName(Partition partition, + ListenerEvent partitionEvent) throws MetaException { + try { + return partitionEvent.getHandler() + .get_table(partition.getDbName(), partition.getTableName()) + .getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); + } catch (NoSuchObjectException e) { + throw new MetaException(e.toString()); } - - private String getTopicPrefix(Configuration conf) { - return conf.get(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, - HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + } + + @Override + public void onAddPartition(AddPartitionEvent partitionEvent) + throws MetaException { + // Subscriber can get notification of newly add partition in a + // particular table by listening on a topic named "dbName.tableName" + // and message selector string as "HCAT_EVENT = HCAT_ADD_PARTITION" + if (partitionEvent.getStatus()) { + + Partition partition = partitionEvent.getPartition(); + String topicName = getTopicName(partition, partitionEvent); + if (topicName != null && !topicName.equals("")) { + send(messageFactory.buildAddPartitionMessage(partitionEvent.getTable(), partition), topicName); + } else { + LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " + + partition.getDbName() + + "." + + partition.getTableName() + + " To enable notifications for this table, please do alter table set properties (" + + HCatConstants.HCAT_MSGBUS_TOPIC_NAME + + "=.) or whatever you want topic name to be."); + } } - /** - * Send dropped table notifications. Subscribers can receive these notifications for - * dropped tables by listening on topic "HCAT" with message selector string - * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_EVENT} = - * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_DROP_TABLE_EVENT} - *
- * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been - * found to throw NPE when serializing objects that contain null. For this reason we override - * some fields in the StorageDescriptor of this notification. This should be fixed after - * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. - */ - @Override - public void onDropTable(DropTableEvent tableEvent) throws MetaException { - // Subscriber can get notification about drop of a table in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_DROP_TABLE" - - // Datanucleus throws NPE when we try to serialize a table object - // retrieved from metastore. To workaround that we reset following objects - - if (tableEvent.getStatus()) { - Table table = tableEvent.getTable(); - String topicName = getTopicPrefix(tableEvent.getHandler().getHiveConf()) + "." + table.getDbName().toLowerCase(); - send(messageFactory.buildDropTableMessage(table), topicName); - } + } + + /** + * Send dropped partition notifications. Subscribers can receive these notifications for a + * particular table by listening on a topic named "dbName.tableName" with message selector + * string {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_EVENT} = + * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_DROP_PARTITION_EVENT}. + *
+ * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been + * found to throw NPE when serializing objects that contain null. For this reason we override + * some fields in the StorageDescriptor of this notification. This should be fixed after + * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. + */ + @Override + public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { + if (partitionEvent.getStatus()) { + Partition partition = partitionEvent.getPartition(); + StorageDescriptor sd = partition.getSd(); + sd.setBucketCols(new ArrayList()); + sd.setSortCols(new ArrayList()); + sd.setParameters(new HashMap()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSkewedInfo().setSkewedColNames(new ArrayList()); + String topicName = getTopicName(partition, partitionEvent); + if (topicName != null && !topicName.equals("")) { + send(messageFactory.buildDropPartitionMessage(partitionEvent.getTable(), partition), topicName); + } else { + LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " + + partition.getDbName() + + "." + + partition.getTableName() + + " To enable notifications for this table, please do alter table set properties (" + + HCatConstants.HCAT_MSGBUS_TOPIC_NAME + + "=.) or whatever you want topic name to be."); + } } - - /** - * @param hCatEventMessage The HCatEventMessage being sent over JMS. - * @param topicName is the name on message broker on which message is sent. - */ - protected void send(HCatEventMessage hCatEventMessage, String topicName) { - send(hCatEventMessage, topicName, NUM_RETRIES); + } + + @Override + public void onCreateDatabase(CreateDatabaseEvent dbEvent) + throws MetaException { + // Subscriber can get notification about addition of a database in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_ADD_DATABASE" + if (dbEvent.getStatus()) { + String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); + send(messageFactory.buildCreateDatabaseMessage(dbEvent.getDatabase()), topicName); } - - /** - * @param hCatEventMessage The HCatEventMessage being sent over JMS, this method is threadsafe - * @param topicName is the name on message broker on which message is sent. - * @param retries the number of retry attempts - */ - protected void send(HCatEventMessage hCatEventMessage, String topicName, int retries) { - try { - if (session.get() == null) { - // Need to reconnect - throw new JMSException("Invalid JMS session"); - } - Destination topic = createTopic(topicName); - Message msg = session.get().createTextMessage(hCatEventMessage.toString()); - - msg.setStringProperty(HCatConstants.HCAT_EVENT, hCatEventMessage.getEventType().toString()); - msg.setStringProperty(HCatConstants.HCAT_MESSAGE_VERSION, messageFactory.getVersion()); - msg.setStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT, messageFactory.getMessageFormat()); - MessageProducer producer = createProducer(topic); - producer.send(msg); - // Message must be transacted before we return. - session.get().commit(); - } catch (Exception e) { - if (retries >= 0) { - // this may happen if we were able to establish connection once, but its no longer valid - LOG.error("Seems like connection is lost. Will retry. Retries left : " + retries + ". error was:", e); - testAndCreateConnection(); - send(hCatEventMessage, topicName, retries - 1); - } else { - // Gobble up the exception. Message delivery is best effort. - LOG.error("Failed to send message on topic: " + topicName + - " event: " + hCatEventMessage.getEventType() + " after retries: " + NUM_RETRIES, e); - } - } + } + + @Override + public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { + // Subscriber can get notification about drop of a database in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_DROP_DATABASE" + if (dbEvent.getStatus()) { + String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); + send(messageFactory.buildDropDatabaseMessage(dbEvent.getDatabase()), topicName); } - - /** - * Get the topic object for the topicName - * - * @param topicName The String identifying the message-topic. - * @return A {@link Topic} object corresponding to the specified topicName. - * @throws JMSException - */ - protected Topic createTopic(final String topicName) throws JMSException { - return session.get().createTopic(topicName); + } + + @Override + public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { + // Subscriber can get notification about addition of a table in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_ADD_TABLE" + if (tableEvent.getStatus()) { + Table tbl = tableEvent.getTable(); + HMSHandler handler = tableEvent.getHandler(); + HiveConf conf = handler.getHiveConf(); + Table newTbl; + try { + newTbl = handler.get_table(tbl.getDbName(), tbl.getTableName()) + .deepCopy(); + newTbl.getParameters().put( + HCatConstants.HCAT_MSGBUS_TOPIC_NAME, + getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase() + "." + + newTbl.getTableName().toLowerCase()); + handler.alter_table(newTbl.getDbName(), newTbl.getTableName(), newTbl); + } catch (InvalidOperationException e) { + MetaException me = new MetaException(e.toString()); + me.initCause(e); + throw me; + } catch (NoSuchObjectException e) { + MetaException me = new MetaException(e.toString()); + me.initCause(e); + throw me; + } + String topicName = getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase(); + send(messageFactory.buildCreateTableMessage(newTbl), topicName); } - - /** - * Does a health check on the connection by sending a dummy message. - * Create the connection if the connection is found to be bad - * Also recreates the session - */ - protected synchronized void testAndCreateConnection() { - if (conn != null) { - // This method is reached when error occurs while sending msg, so the session must be bad - session.remove(); - if (!isConnectionHealthy()) { - // I am the first thread to detect the error, cleanup old connection & reconnect - try { - conn.close(); - } catch (Exception e) { - LOG.error("Unable to close bad JMS connection, ignored error", e); - } - conn = createConnection(); - } - } else { - conn = createConnection(); - } - try { - session.set(createSession()); - } catch (JMSException e) { - LOG.error("Couldn't create JMS session, ignored the error", e); - } + } + + private String getTopicPrefix(Configuration conf) { + return conf.get(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + } + + /** + * Send dropped table notifications. Subscribers can receive these notifications for + * dropped tables by listening on topic "HCAT" with message selector string + * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_EVENT} = + * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_DROP_TABLE_EVENT} + *
+ * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been + * found to throw NPE when serializing objects that contain null. For this reason we override + * some fields in the StorageDescriptor of this notification. This should be fixed after + * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. + */ + @Override + public void onDropTable(DropTableEvent tableEvent) throws MetaException { + // Subscriber can get notification about drop of a table in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_DROP_TABLE" + + // Datanucleus throws NPE when we try to serialize a table object + // retrieved from metastore. To workaround that we reset following objects + + if (tableEvent.getStatus()) { + Table table = tableEvent.getTable(); + String topicName = getTopicPrefix(tableEvent.getHandler().getHiveConf()) + "." + table.getDbName().toLowerCase(); + send(messageFactory.buildDropTableMessage(table), topicName); } - - /** - * Create the JMS connection - * @return newly created JMS connection - */ - protected Connection createConnection() { - LOG.info("Will create new JMS connection"); - Context jndiCntxt; - Connection jmsConnection = null; - try { - jndiCntxt = new InitialContext(); - ConnectionFactory connFac = (ConnectionFactory) jndiCntxt.lookup("ConnectionFactory"); - jmsConnection = connFac.createConnection(); - jmsConnection.start(); - jmsConnection.setExceptionListener(new ExceptionListener() { - @Override - public void onException(JMSException jmse) { - LOG.error("JMS Exception listener received exception. Ignored the error", jmse); - } - }); - } catch (NamingException e) { - LOG.error("JNDI error while setting up Message Bus connection. " - + "Please make sure file named 'jndi.properties' is in " - + "classpath and contains appropriate key-value pairs.", e); - } catch (JMSException e) { - LOG.error("Failed to initialize connection to message bus", e); - } catch (Throwable t) { - LOG.error("Unable to connect to JMS provider", t); - } - return jmsConnection; + } + + /** + * @param hCatEventMessage The HCatEventMessage being sent over JMS. + * @param topicName is the name on message broker on which message is sent. + */ + protected void send(HCatEventMessage hCatEventMessage, String topicName) { + send(hCatEventMessage, topicName, NUM_RETRIES); + } + + /** + * @param hCatEventMessage The HCatEventMessage being sent over JMS, this method is threadsafe + * @param topicName is the name on message broker on which message is sent. + * @param retries the number of retry attempts + */ + protected void send(HCatEventMessage hCatEventMessage, String topicName, int retries) { + try { + if (session.get() == null) { + // Need to reconnect + throw new JMSException("Invalid JMS session"); + } + Destination topic = createTopic(topicName); + Message msg = session.get().createTextMessage(hCatEventMessage.toString()); + + msg.setStringProperty(HCatConstants.HCAT_EVENT, hCatEventMessage.getEventType().toString()); + msg.setStringProperty(HCatConstants.HCAT_MESSAGE_VERSION, messageFactory.getVersion()); + msg.setStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT, messageFactory.getMessageFormat()); + MessageProducer producer = createProducer(topic); + producer.send(msg); + // Message must be transacted before we return. + session.get().commit(); + } catch (Exception e) { + if (retries >= 0) { + // this may happen if we were able to establish connection once, but its no longer valid + LOG.error("Seems like connection is lost. Will retry. Retries left : " + retries + ". error was:", e); + testAndCreateConnection(); + send(hCatEventMessage, topicName, retries - 1); + } else { + // Gobble up the exception. Message delivery is best effort. + LOG.error("Failed to send message on topic: " + topicName + + " event: " + hCatEventMessage.getEventType() + " after retries: " + NUM_RETRIES, e); + } } - - /** - * Send a dummy message to probe if the JMS connection is healthy - * @return true if connection is healthy, false otherwise - */ - protected boolean isConnectionHealthy() { + } + + /** + * Get the topic object for the topicName + * + * @param topicName The String identifying the message-topic. + * @return A {@link Topic} object corresponding to the specified topicName. + * @throws JMSException + */ + protected Topic createTopic(final String topicName) throws JMSException { + return session.get().createTopic(topicName); + } + + /** + * Does a health check on the connection by sending a dummy message. + * Create the connection if the connection is found to be bad + * Also recreates the session + */ + protected synchronized void testAndCreateConnection() { + if (conn != null) { + // This method is reached when error occurs while sending msg, so the session must be bad + session.remove(); + if (!isConnectionHealthy()) { + // I am the first thread to detect the error, cleanup old connection & reconnect try { - Topic topic = createTopic(getTopicPrefix(getConf()) + "." + HEALTH_CHECK_TOPIC_SUFFIX); - MessageProducer producer = createProducer(topic); - Message msg = session.get().createTextMessage(HEALTH_CHECK_MSG); - producer.send(msg, DeliveryMode.NON_PERSISTENT, 4, 0); + conn.close(); } catch (Exception e) { - return false; + LOG.error("Unable to close bad JMS connection, ignored error", e); } - return true; - } - - /** - * Creates a JMS session - * @return newly create JMS session - * @throws JMSException - */ - protected Session createSession() throws JMSException { - // We want message to be sent when session commits, thus we run in - // transacted mode. - return conn.createSession(true, Session.SESSION_TRANSACTED); + conn = createConnection(); + } + } else { + conn = createConnection(); } - - /** - * Create a JMS producer - * @param topic - * @return newly created message producer - * @throws JMSException - */ - protected MessageProducer createProducer(Destination topic) throws JMSException { - return session.get().createProducer(topic); + try { + session.set(createSession()); + } catch (JMSException e) { + LOG.error("Couldn't create JMS session, ignored the error", e); } - - @Override - protected void finalize() throws Throwable { - if (conn != null) { - try { - conn.close(); - } catch (Exception e) { - LOG.error("Couldn't close jms connection, ignored the error", e); - } + } + + /** + * Create the JMS connection + * @return newly created JMS connection + */ + protected Connection createConnection() { + LOG.info("Will create new JMS connection"); + Context jndiCntxt; + Connection jmsConnection = null; + try { + jndiCntxt = new InitialContext(); + ConnectionFactory connFac = (ConnectionFactory) jndiCntxt.lookup("ConnectionFactory"); + jmsConnection = connFac.createConnection(); + jmsConnection.start(); + jmsConnection.setExceptionListener(new ExceptionListener() { + @Override + public void onException(JMSException jmse) { + LOG.error("JMS Exception listener received exception. Ignored the error", jmse); } + }); + } catch (NamingException e) { + LOG.error("JNDI error while setting up Message Bus connection. " + + "Please make sure file named 'jndi.properties' is in " + + "classpath and contains appropriate key-value pairs.", e); + } catch (JMSException e) { + LOG.error("Failed to initialize connection to message bus", e); + } catch (Throwable t) { + LOG.error("Unable to connect to JMS provider", t); } - - @Override - public void onLoadPartitionDone(LoadPartitionDoneEvent lpde) - throws MetaException { -// TODO: Fix LoadPartitionDoneEvent. Currently, LPDE can only carry a single partition-spec. And that defeats the purpose. -// if(lpde.getStatus()) -// send(lpde.getPartitionName(),lpde.getTable().getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME),HCatConstants.HCAT_PARTITION_DONE_EVENT); + return jmsConnection; + } + + /** + * Send a dummy message to probe if the JMS connection is healthy + * @return true if connection is healthy, false otherwise + */ + protected boolean isConnectionHealthy() { + try { + Topic topic = createTopic(getTopicPrefix(getConf()) + "." + HEALTH_CHECK_TOPIC_SUFFIX); + MessageProducer producer = createProducer(topic); + Message msg = session.get().createTextMessage(HEALTH_CHECK_MSG); + producer.send(msg, DeliveryMode.NON_PERSISTENT, 4, 0); + } catch (Exception e) { + return false; } - - @Override - public void onAlterPartition(AlterPartitionEvent ape) throws MetaException { - // no-op + return true; + } + + /** + * Creates a JMS session + * @return newly create JMS session + * @throws JMSException + */ + protected Session createSession() throws JMSException { + // We want message to be sent when session commits, thus we run in + // transacted mode. + return conn.createSession(true, Session.SESSION_TRANSACTED); + } + + /** + * Create a JMS producer + * @param topic + * @return newly created message producer + * @throws JMSException + */ + protected MessageProducer createProducer(Destination topic) throws JMSException { + return session.get().createProducer(topic); + } + + @Override + protected void finalize() throws Throwable { + if (conn != null) { + try { + conn.close(); + } catch (Exception e) { + LOG.error("Couldn't close jms connection, ignored the error", e); + } } + } - @Override - public void onAlterTable(AlterTableEvent ate) throws MetaException { - // no-op - } + @Override + public void onLoadPartitionDone(LoadPartitionDoneEvent lpde) + throws MetaException { +// TODO: Fix LoadPartitionDoneEvent. Currently, LPDE can only carry a single partition-spec. And that defeats the purpose. +// if(lpde.getStatus()) +// send(lpde.getPartitionName(),lpde.getTable().getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME),HCatConstants.HCAT_PARTITION_DONE_EVENT); + } + + @Override + public void onAlterPartition(AlterPartitionEvent ape) throws MetaException { + // no-op + } + + @Override + public void onAlterTable(AlterTableEvent ate) throws MetaException { + // no-op + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/AddPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/AddPartitionMessage.java index 786380b..5b114b5 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/AddPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/AddPartitionMessage.java @@ -27,28 +27,28 @@ */ public abstract class AddPartitionMessage extends HCatEventMessage { - protected AddPartitionMessage() { - super(EventType.ADD_PARTITION); - } + protected AddPartitionMessage() { + super(EventType.ADD_PARTITION); + } - /** - * Getter for name of table (where partitions are added). - * @return Table-name (String). - */ - public abstract String getTable(); + /** + * Getter for name of table (where partitions are added). + * @return Table-name (String). + */ + public abstract String getTable(); - /** - * Getter for list of partitions added. - * @return List of maps, where each map identifies values for each partition-key, for every added partition. - */ - public abstract List> getPartitions (); + /** + * Getter for list of partitions added. + * @return List of maps, where each map identifies values for each partition-key, for every added partition. + */ + public abstract List> getPartitions (); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - if (getPartitions() == null) - throw new IllegalStateException("Partition-list unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + if (getPartitions() == null) + throw new IllegalStateException("Partition-list unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateDatabaseMessage.java index 380890f..e465ab3 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateDatabaseMessage.java @@ -24,8 +24,8 @@ */ public abstract class CreateDatabaseMessage extends HCatEventMessage { - protected CreateDatabaseMessage() { - super(EventType.CREATE_DATABASE); - } + protected CreateDatabaseMessage() { + super(EventType.CREATE_DATABASE); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateTableMessage.java index 240c3b8..6c8e2a4 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateTableMessage.java @@ -24,20 +24,20 @@ */ public abstract class CreateTableMessage extends HCatEventMessage { - protected CreateTableMessage() { - super(EventType.CREATE_TABLE); - } + protected CreateTableMessage() { + super(EventType.CREATE_TABLE); + } - /** - * Getter for the name of table created in HCatalog. - * @return Table-name (String). - */ - public abstract String getTable(); + /** + * Getter for the name of table created in HCatalog. + * @return Table-name (String). + */ + public abstract String getTable(); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropDatabaseMessage.java index a0279ef..d9675ba 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropDatabaseMessage.java @@ -24,7 +24,7 @@ */ public abstract class DropDatabaseMessage extends HCatEventMessage { - protected DropDatabaseMessage() { - super(EventType.DROP_DATABASE); - } + protected DropDatabaseMessage() { + super(EventType.DROP_DATABASE); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropPartitionMessage.java index 57bf455..ee222ec 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropPartitionMessage.java @@ -27,19 +27,19 @@ */ public abstract class DropPartitionMessage extends HCatEventMessage { - protected DropPartitionMessage() { - super(EventType.DROP_PARTITION); - } + protected DropPartitionMessage() { + super(EventType.DROP_PARTITION); + } - public abstract String getTable(); - public abstract List> getPartitions (); + public abstract String getTable(); + public abstract List> getPartitions (); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - if (getPartitions() == null) - throw new IllegalStateException("Partition-list unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + if (getPartitions() == null) + throw new IllegalStateException("Partition-list unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropTableMessage.java index 4e6233e..e47b572 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropTableMessage.java @@ -24,20 +24,20 @@ */ public abstract class DropTableMessage extends HCatEventMessage { - protected DropTableMessage() { - super(EventType.DROP_TABLE); - } + protected DropTableMessage() { + super(EventType.DROP_TABLE); + } - /** - * Getter for the name of the table being dropped. - * @return Table-name (String). - */ - public abstract String getTable(); + /** + * Getter for the name of the table being dropped. + * @return Table-name (String). + */ + public abstract String getTable(); - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - return super.checkValid(); - } + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + return super.checkValid(); + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/HCatEventMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/HCatEventMessage.java index 1afca14..4d77057 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/HCatEventMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/HCatEventMessage.java @@ -27,74 +27,74 @@ */ public abstract class HCatEventMessage { - /** - * Enumeration of all supported types of Metastore operations. - */ - public static enum EventType { - - CREATE_DATABASE(HCatConstants.HCAT_CREATE_DATABASE_EVENT), - DROP_DATABASE(HCatConstants.HCAT_DROP_DATABASE_EVENT), - CREATE_TABLE(HCatConstants.HCAT_CREATE_TABLE_EVENT), - DROP_TABLE(HCatConstants.HCAT_DROP_TABLE_EVENT), - ADD_PARTITION(HCatConstants.HCAT_ADD_PARTITION_EVENT), - DROP_PARTITION(HCatConstants.HCAT_DROP_PARTITION_EVENT); - - private String typeString; - - EventType(String typeString) { - this.typeString = typeString; - } - - @Override - public String toString() { return typeString; } + /** + * Enumeration of all supported types of Metastore operations. + */ + public static enum EventType { + + CREATE_DATABASE(HCatConstants.HCAT_CREATE_DATABASE_EVENT), + DROP_DATABASE(HCatConstants.HCAT_DROP_DATABASE_EVENT), + CREATE_TABLE(HCatConstants.HCAT_CREATE_TABLE_EVENT), + DROP_TABLE(HCatConstants.HCAT_DROP_TABLE_EVENT), + ADD_PARTITION(HCatConstants.HCAT_ADD_PARTITION_EVENT), + DROP_PARTITION(HCatConstants.HCAT_DROP_PARTITION_EVENT); + + private String typeString; + + EventType(String typeString) { + this.typeString = typeString; } - protected EventType eventType; - - protected HCatEventMessage(EventType eventType) { - this.eventType = eventType; - } - - public EventType getEventType() { - return eventType; - } - - /** - * Getter for HCatalog Server's URL. - * (This is where the event originates from.) - * @return HCatalog Server's URL (String). - */ - public abstract String getServer(); - - /** - * Getter for the Kerberos principal of the HCatalog service. - * @return HCatalog Service Principal (String). - */ - public abstract String getServicePrincipal(); - - /** - * Getter for the name of the Database on which the Metastore operation is done. - * @return Database-name (String). - */ - public abstract String getDB(); - - /** - * Getter for the timestamp associated with the operation. - * @return Timestamp (Long - seconds since epoch). - */ - public abstract Long getTimestamp(); - - /** - * Class invariant. Checked after construction or deserialization. - */ - public HCatEventMessage checkValid() { - if (getServer() == null || getServicePrincipal() == null) - throw new IllegalStateException("Server-URL/Service-Principal shouldn't be null."); - if (getEventType() == null) - throw new IllegalStateException("Event-type unset."); - if (getDB() == null) - throw new IllegalArgumentException("DB-name unset."); - - return this; - } + @Override + public String toString() { return typeString; } + } + + protected EventType eventType; + + protected HCatEventMessage(EventType eventType) { + this.eventType = eventType; + } + + public EventType getEventType() { + return eventType; + } + + /** + * Getter for HCatalog Server's URL. + * (This is where the event originates from.) + * @return HCatalog Server's URL (String). + */ + public abstract String getServer(); + + /** + * Getter for the Kerberos principal of the HCatalog service. + * @return HCatalog Service Principal (String). + */ + public abstract String getServicePrincipal(); + + /** + * Getter for the name of the Database on which the Metastore operation is done. + * @return Database-name (String). + */ + public abstract String getDB(); + + /** + * Getter for the timestamp associated with the operation. + * @return Timestamp (Long - seconds since epoch). + */ + public abstract Long getTimestamp(); + + /** + * Class invariant. Checked after construction or deserialization. + */ + public HCatEventMessage checkValid() { + if (getServer() == null || getServicePrincipal() == null) + throw new IllegalStateException("Server-URL/Service-Principal shouldn't be null."); + if (getEventType() == null) + throw new IllegalStateException("Event-type unset."); + if (getDB() == null) + throw new IllegalArgumentException("DB-name unset."); + + return this; + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageDeserializer.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageDeserializer.java index 1f4ba08..a89c956 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageDeserializer.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageDeserializer.java @@ -24,60 +24,60 @@ */ public abstract class MessageDeserializer { - /** - * Method to construct HCatEventMessage from string. - */ - public HCatEventMessage getHCatEventMessage(String eventTypeString, String messageBody) { + /** + * Method to construct HCatEventMessage from string. + */ + public HCatEventMessage getHCatEventMessage(String eventTypeString, String messageBody) { - switch (HCatEventMessage.EventType.valueOf(eventTypeString)) { - case CREATE_DATABASE: - return getCreateDatabaseMessage(messageBody); - case DROP_DATABASE: - return getDropDatabaseMessage(messageBody); - case CREATE_TABLE: - return getCreateTableMessage(messageBody); - case DROP_TABLE: - return getDropTableMessage(messageBody); - case ADD_PARTITION: - return getAddPartitionMessage(messageBody); - case DROP_PARTITION: - return getDropPartitionMessage(messageBody); + switch (HCatEventMessage.EventType.valueOf(eventTypeString)) { + case CREATE_DATABASE: + return getCreateDatabaseMessage(messageBody); + case DROP_DATABASE: + return getDropDatabaseMessage(messageBody); + case CREATE_TABLE: + return getCreateTableMessage(messageBody); + case DROP_TABLE: + return getDropTableMessage(messageBody); + case ADD_PARTITION: + return getAddPartitionMessage(messageBody); + case DROP_PARTITION: + return getDropPartitionMessage(messageBody); - default: - throw new IllegalArgumentException("Unsupported event-type: " + eventTypeString); - } + default: + throw new IllegalArgumentException("Unsupported event-type: " + eventTypeString); } + } - /** - * Method to de-serialize CreateDatabaseMessage instance. - */ - public abstract CreateDatabaseMessage getCreateDatabaseMessage(String messageBody); + /** + * Method to de-serialize CreateDatabaseMessage instance. + */ + public abstract CreateDatabaseMessage getCreateDatabaseMessage(String messageBody); - /** - * Method to de-serialize DropDatabaseMessage instance. - */ - public abstract DropDatabaseMessage getDropDatabaseMessage(String messageBody); + /** + * Method to de-serialize DropDatabaseMessage instance. + */ + public abstract DropDatabaseMessage getDropDatabaseMessage(String messageBody); - /** - * Method to de-serialize CreateTableMessage instance. - */ - public abstract CreateTableMessage getCreateTableMessage(String messageBody); + /** + * Method to de-serialize CreateTableMessage instance. + */ + public abstract CreateTableMessage getCreateTableMessage(String messageBody); - /** - * Method to de-serialize DropTableMessage instance. - */ - public abstract DropTableMessage getDropTableMessage(String messageBody); + /** + * Method to de-serialize DropTableMessage instance. + */ + public abstract DropTableMessage getDropTableMessage(String messageBody); - /** - * Method to de-serialize AddPartitionMessage instance. - */ - public abstract AddPartitionMessage getAddPartitionMessage(String messageBody); + /** + * Method to de-serialize AddPartitionMessage instance. + */ + public abstract AddPartitionMessage getAddPartitionMessage(String messageBody); - /** - * Method to de-serialize DropPartitionMessage instance. - */ - public abstract DropPartitionMessage getDropPartitionMessage(String messageBody); + /** + * Method to de-serialize DropPartitionMessage instance. + */ + public abstract DropPartitionMessage getDropPartitionMessage(String messageBody); - // Protection against construction. - protected MessageDeserializer() {} + // Protection against construction. + protected MessageDeserializer() {} } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java index ff0ecdf..8080a8a 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java @@ -31,108 +31,108 @@ */ public abstract class MessageFactory { - private static MessageFactory instance = new JSONMessageFactory(); - - protected static final HiveConf hiveConf = new HiveConf(); - static { - hiveConf.addResource("hive-site.xml"); + private static MessageFactory instance = new JSONMessageFactory(); + + protected static final HiveConf hiveConf = new HiveConf(); + static { + hiveConf.addResource("hive-site.xml"); + } + + private static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; + private static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; + private static final String HCAT_MESSAGE_FORMAT = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FORMAT, "json"); + private static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory"; + private static final String HCAT_MESSAGE_FACTORY_IMPL = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + + HCAT_MESSAGE_FORMAT, + DEFAULT_MESSAGE_FACTORY_IMPL); + + protected static final String HCAT_SERVER_URL = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.name(), ""); + protected static final String HCAT_SERVICE_PRINCIPAL = hiveConf.get(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.name(), ""); + + /** + * Getter for MessageFactory instance. + */ + public static MessageFactory getInstance() { + if (instance == null) { + instance = getInstance(HCAT_MESSAGE_FACTORY_IMPL); } + return instance; + } - private static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; - private static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; - private static final String HCAT_MESSAGE_FORMAT = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FORMAT, "json"); - private static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory"; - private static final String HCAT_MESSAGE_FACTORY_IMPL = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX - + HCAT_MESSAGE_FORMAT, - DEFAULT_MESSAGE_FACTORY_IMPL); - - protected static final String HCAT_SERVER_URL = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.name(), ""); - protected static final String HCAT_SERVICE_PRINCIPAL = hiveConf.get(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.name(), ""); - - /** - * Getter for MessageFactory instance. - */ - public static MessageFactory getInstance() { - if (instance == null) { - instance = getInstance(HCAT_MESSAGE_FACTORY_IMPL); - } - return instance; + private static MessageFactory getInstance(String className) { + try { + return (MessageFactory)ReflectionUtils.newInstance(Class.forName(className), hiveConf); } - - private static MessageFactory getInstance(String className) { - try { - return (MessageFactory)ReflectionUtils.newInstance(Class.forName(className), hiveConf); - } - catch (ClassNotFoundException classNotFound) { - throw new IllegalStateException("Could not construct MessageFactory implementation: ", classNotFound); - } + catch (ClassNotFoundException classNotFound) { + throw new IllegalStateException("Could not construct MessageFactory implementation: ", classNotFound); } - - /** - * Getter for MessageDeserializer, corresponding to the specified format and version. - * @param format Serialization format for notifications. - * @param version Version of serialization format (currently ignored.) - * @return MessageDeserializer. - */ - public static MessageDeserializer getDeserializer(String format, - String version) { - return getInstance(hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + format, - DEFAULT_MESSAGE_FACTORY_IMPL)).getDeserializer(); - } - - public abstract MessageDeserializer getDeserializer(); - - /** - * Getter for version-string, corresponding to all constructed messages. - */ - public abstract String getVersion(); - - /** - * Getter for message-format. - */ - public abstract String getMessageFormat(); - - /** - * Factory method for CreateDatabaseMessage. - * @param db The Database being added. - * @return CreateDatabaseMessage instance. - */ - public abstract CreateDatabaseMessage buildCreateDatabaseMessage(Database db); - - /** - * Factory method for DropDatabaseMessage. - * @param db The Database being dropped. - * @return DropDatabaseMessage instance. - */ - public abstract DropDatabaseMessage buildDropDatabaseMessage(Database db); - - /** - * Factory method for CreateTableMessage. - * @param table The Table being created. - * @return CreateTableMessage instance. - */ - public abstract CreateTableMessage buildCreateTableMessage(Table table); - - /** - * Factory method for DropTableMessage. - * @param table The Table being dropped. - * @return DropTableMessage instance. - */ - public abstract DropTableMessage buildDropTableMessage(Table table); - - /** - * Factory method for AddPartitionMessage. - * @param table The Table to which the partition is added. - * @param partition The Partition being added. - * @return AddPartitionMessage instance. - */ - public abstract AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition); - - /** - * Factory method for DropPartitionMessage. - * @param table The Table from which the partition is dropped. - * @param partition The Partition being dropped. - * @return DropPartitionMessage instance. - */ - public abstract DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition); + } + + /** + * Getter for MessageDeserializer, corresponding to the specified format and version. + * @param format Serialization format for notifications. + * @param version Version of serialization format (currently ignored.) + * @return MessageDeserializer. + */ + public static MessageDeserializer getDeserializer(String format, + String version) { + return getInstance(hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + format, + DEFAULT_MESSAGE_FACTORY_IMPL)).getDeserializer(); + } + + public abstract MessageDeserializer getDeserializer(); + + /** + * Getter for version-string, corresponding to all constructed messages. + */ + public abstract String getVersion(); + + /** + * Getter for message-format. + */ + public abstract String getMessageFormat(); + + /** + * Factory method for CreateDatabaseMessage. + * @param db The Database being added. + * @return CreateDatabaseMessage instance. + */ + public abstract CreateDatabaseMessage buildCreateDatabaseMessage(Database db); + + /** + * Factory method for DropDatabaseMessage. + * @param db The Database being dropped. + * @return DropDatabaseMessage instance. + */ + public abstract DropDatabaseMessage buildDropDatabaseMessage(Database db); + + /** + * Factory method for CreateTableMessage. + * @param table The Table being created. + * @return CreateTableMessage instance. + */ + public abstract CreateTableMessage buildCreateTableMessage(Table table); + + /** + * Factory method for DropTableMessage. + * @param table The Table being dropped. + * @return DropTableMessage instance. + */ + public abstract DropTableMessage buildDropTableMessage(Table table); + + /** + * Factory method for AddPartitionMessage. + * @param table The Table to which the partition is added. + * @param partition The Partition being added. + * @return AddPartitionMessage instance. + */ + public abstract AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition); + + /** + * Factory method for DropPartitionMessage. + * @param table The Table from which the partition is dropped. + * @param partition The Partition being dropped. + * @return DropPartitionMessage instance. + */ + public abstract DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition); } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/jms/MessagingUtils.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/jms/MessagingUtils.java index 65c4770..c61d997 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/jms/MessagingUtils.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/jms/MessagingUtils.java @@ -34,29 +34,29 @@ */ public class MessagingUtils { - /** - * Method to return HCatEventMessage contained in the JMS message. - * @param message The JMS Message instance - * @return The contained HCatEventMessage - */ - public static HCatEventMessage getMessage(Message message) { - try { - String messageBody = ((TextMessage)message).getText(); - String eventType = message.getStringProperty(HCatConstants.HCAT_EVENT); - String messageVersion = message.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); - String messageFormat = message.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); - - if (StringUtils.isEmpty(messageBody) || StringUtils.isEmpty(eventType)) - throw new IllegalArgumentException("Could not extract HCatEventMessage. " + - "EventType and/or MessageBody is null/empty."); - - return MessageFactory.getDeserializer(messageFormat, messageVersion).getHCatEventMessage(eventType, messageBody); - } - catch (JMSException exception) { - throw new IllegalArgumentException("Could not extract HCatEventMessage. ", exception); - } + /** + * Method to return HCatEventMessage contained in the JMS message. + * @param message The JMS Message instance + * @return The contained HCatEventMessage + */ + public static HCatEventMessage getMessage(Message message) { + try { + String messageBody = ((TextMessage)message).getText(); + String eventType = message.getStringProperty(HCatConstants.HCAT_EVENT); + String messageVersion = message.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); + String messageFormat = message.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); + + if (StringUtils.isEmpty(messageBody) || StringUtils.isEmpty(eventType)) + throw new IllegalArgumentException("Could not extract HCatEventMessage. " + + "EventType and/or MessageBody is null/empty."); + + return MessageFactory.getDeserializer(messageFormat, messageVersion).getHCatEventMessage(eventType, messageBody); } + catch (JMSException exception) { + throw new IllegalArgumentException("Could not extract HCatEventMessage. ", exception); + } + } - // Prevent construction. - private MessagingUtils() {} + // Prevent construction. + private MessagingUtils() {} } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONAddPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONAddPartitionMessage.java index 19484fc..ac7dcd9 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONAddPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONAddPartitionMessage.java @@ -30,56 +30,56 @@ */ public class JSONAddPartitionMessage extends AddPartitionMessage { - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - @JsonProperty - List> partitions; - - /** - * Default Constructor. Required for Jackson. - */ - public JSONAddPartitionMessage() {} - - public JSONAddPartitionMessage(String server, String servicePrincipal, String db, String table, - List> partitions, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.partitions = partitions; - this.timestamp = timestamp; - checkValid(); - } + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + @JsonProperty + List> partitions; - @Override - public String getServer() { return server; } + /** + * Default Constructor. Required for Jackson. + */ + public JSONAddPartitionMessage() {} - @Override - public String getServicePrincipal() { return servicePrincipal; } + public JSONAddPartitionMessage(String server, String servicePrincipal, String db, String table, + List> partitions, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.partitions = partitions; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getDB() { return db; } + @Override + public String getServer() { return server; } - @Override - public String getTable() { return table; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public String getDB() { return db; } - @Override - public List> getPartitions () { return partitions; } + @Override + public String getTable() { return table; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public List> getPartitions () { return partitions; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateDatabaseMessage.java index 1b11658..6db46cf 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateDatabaseMessage.java @@ -27,45 +27,45 @@ */ public class JSONCreateDatabaseMessage extends CreateDatabaseMessage { - @JsonProperty - String server, servicePrincipal, db; + @JsonProperty + String server, servicePrincipal, db; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, required for Jackson. - */ - public JSONCreateDatabaseMessage() {} + /** + * Default constructor, required for Jackson. + */ + public JSONCreateDatabaseMessage() {} - public JSONCreateDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.timestamp = timestamp; - checkValid(); - } + public JSONCreateDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateTableMessage.java index f7cc085..9c66730 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateTableMessage.java @@ -27,48 +27,48 @@ */ public class JSONCreateTableMessage extends CreateTableMessage { - @JsonProperty - String server, servicePrincipal, db, table; + @JsonProperty + String server, servicePrincipal, db, table; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, needed for Jackson. - */ - public JSONCreateTableMessage() {} + /** + * Default constructor, needed for Jackson. + */ + public JSONCreateTableMessage() {} - public JSONCreateTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.timestamp = timestamp; - checkValid(); - } + public JSONCreateTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String getTable() { return table; } + @Override + public String getTable() { return table; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropDatabaseMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropDatabaseMessage.java index bf8d8c7..bfe3f63 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropDatabaseMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropDatabaseMessage.java @@ -27,45 +27,45 @@ */ public class JSONDropDatabaseMessage extends DropDatabaseMessage { - @JsonProperty - String server, servicePrincipal, db; + @JsonProperty + String server, servicePrincipal, db; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, required for Jackson. - */ - public JSONDropDatabaseMessage() {} + /** + * Default constructor, required for Jackson. + */ + public JSONDropDatabaseMessage() {} - public JSONDropDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.timestamp = timestamp; - checkValid(); - } + public JSONDropDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropPartitionMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropPartitionMessage.java index 500d75a..a4d6400 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropPartitionMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropPartitionMessage.java @@ -30,57 +30,57 @@ */ public class JSONDropPartitionMessage extends DropPartitionMessage { - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - @JsonProperty - List> partitions; - - /** - * Default Constructor. Required for Jackson. - */ - public JSONDropPartitionMessage() {} - - public JSONDropPartitionMessage(String server, String servicePrincipal, String db, String table, - List> partitions, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.partitions = partitions; - this.timestamp = timestamp; - checkValid(); - } + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + @JsonProperty + List> partitions; + /** + * Default Constructor. Required for Jackson. + */ + public JSONDropPartitionMessage() {} - @Override - public String getServer() { return server; } + public JSONDropPartitionMessage(String server, String servicePrincipal, String db, String table, + List> partitions, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.partitions = partitions; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getServer() { return server; } - @Override - public String getTable() { return table; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public String getDB() { return db; } - @Override - public List> getPartitions () { return partitions; } + @Override + public String getTable() { return table; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public List> getPartitions () { return partitions; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropTableMessage.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropTableMessage.java index 3c19a64..3b62023 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropTableMessage.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropTableMessage.java @@ -27,50 +27,50 @@ */ public class JSONDropTableMessage extends DropTableMessage { - @JsonProperty - String server, servicePrincipal, db, table; + @JsonProperty + String server, servicePrincipal, db, table; - @JsonProperty - Long timestamp; + @JsonProperty + Long timestamp; - /** - * Default constructor, needed for Jackson. - */ - public JSONDropTableMessage() {} + /** + * Default constructor, needed for Jackson. + */ + public JSONDropTableMessage() {} - public JSONDropTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.timestamp = timestamp; - checkValid(); - } + public JSONDropTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.timestamp = timestamp; + checkValid(); + } - @Override - public String getTable() { return table; } + @Override + public String getTable() { return table; } - @Override - public String getServer() { return server; } + @Override + public String getServer() { return server; } - @Override - public String getServicePrincipal() { return servicePrincipal; } + @Override + public String getServicePrincipal() { return servicePrincipal; } - @Override - public String getDB() { return db; } + @Override + public String getDB() { return db; } - @Override - public Long getTimestamp() { return timestamp; } + @Override + public Long getTimestamp() { return timestamp; } - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageDeserializer.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageDeserializer.java index 018f35c..8a4538a 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageDeserializer.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageDeserializer.java @@ -34,69 +34,69 @@ */ public class JSONMessageDeserializer extends MessageDeserializer { - static ObjectMapper mapper = new ObjectMapper(); // Thread-safe. + static ObjectMapper mapper = new ObjectMapper(); // Thread-safe. - static { - mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); - } + static { + mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } - @Override - public CreateDatabaseMessage getCreateDatabaseMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONCreateDatabaseMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONCreateDatabaseMessage.", exception); - } + @Override + public CreateDatabaseMessage getCreateDatabaseMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONCreateDatabaseMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONCreateDatabaseMessage.", exception); } + } - @Override - public DropDatabaseMessage getDropDatabaseMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropDatabaseMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONDropDatabaseMessage.", exception); - } + @Override + public DropDatabaseMessage getDropDatabaseMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropDatabaseMessage.class); } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONDropDatabaseMessage.", exception); + } + } - @Override - public CreateTableMessage getCreateTableMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONCreateTableMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONCreateTableMessage.", exception); - } + @Override + public CreateTableMessage getCreateTableMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONCreateTableMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONCreateTableMessage.", exception); } + } - @Override - public DropTableMessage getDropTableMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropTableMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONDropTableMessage.", exception); - } + @Override + public DropTableMessage getDropTableMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropTableMessage.class); } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONDropTableMessage.", exception); + } + } - @Override - public AddPartitionMessage getAddPartitionMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONAddPartitionMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct AddPartitionMessage.", exception); - } + @Override + public AddPartitionMessage getAddPartitionMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONAddPartitionMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct AddPartitionMessage.", exception); } + } - @Override - public DropPartitionMessage getDropPartitionMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropPartitionMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct DropPartitionMessage.", exception); - } + @Override + public DropPartitionMessage getDropPartitionMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropPartitionMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct DropPartitionMessage.", exception); } + } } diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java index 92d39ed..be6d4e8 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java @@ -41,66 +41,66 @@ */ public class JSONMessageFactory extends MessageFactory { - private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); + private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); - @Override - public MessageDeserializer getDeserializer() { - return deserializer; - } + @Override + public MessageDeserializer getDeserializer() { + return deserializer; + } - @Override - public String getVersion() { - return "0.1"; - } + @Override + public String getVersion() { + return "0.1"; + } - @Override - public String getMessageFormat() { - return "json"; - } + @Override + public String getMessageFormat() { + return "json"; + } - @Override - public CreateDatabaseMessage buildCreateDatabaseMessage(Database db) { - return new JSONCreateDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), - System.currentTimeMillis() / 1000); - } + @Override + public CreateDatabaseMessage buildCreateDatabaseMessage(Database db) { + return new JSONCreateDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), + System.currentTimeMillis() / 1000); + } - @Override - public DropDatabaseMessage buildDropDatabaseMessage(Database db) { - return new JSONDropDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), - System.currentTimeMillis() / 1000); - } + @Override + public DropDatabaseMessage buildDropDatabaseMessage(Database db) { + return new JSONDropDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), + System.currentTimeMillis() / 1000); + } - @Override - public CreateTableMessage buildCreateTableMessage(Table table) { - return new JSONCreateTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), - table.getTableName(), System.currentTimeMillis()/1000); - } + @Override + public CreateTableMessage buildCreateTableMessage(Table table) { + return new JSONCreateTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), + table.getTableName(), System.currentTimeMillis()/1000); + } - @Override - public DropTableMessage buildDropTableMessage(Table table) { - return new JSONDropTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), table.getTableName(), - System.currentTimeMillis()/1000); - } + @Override + public DropTableMessage buildDropTableMessage(Table table) { + return new JSONDropTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), table.getTableName(), + System.currentTimeMillis()/1000); + } - @Override - public AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition) { - return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), - partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), - System.currentTimeMillis()/1000); - } + @Override + public AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition) { + return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), + partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), + System.currentTimeMillis()/1000); + } - @Override - public DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition) { - return new JSONDropPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), - partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), - System.currentTimeMillis()/1000); - } + @Override + public DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition) { + return new JSONDropPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), + partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), + System.currentTimeMillis()/1000); + } - private static Map getPartitionKeyValues(Table table, Partition partition) { - Map partitionKeys = new LinkedHashMap(); - for (int i=0; i getPartitionKeyValues(Table table, Partition partition) { + Map partitionKeys = new LinkedHashMap(); + for (int i=0; i actualMessages = new ArrayList(); - - @Before - public void setUp() throws Exception { - System.setProperty("java.naming.factory.initial", - "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); - System.setProperty("java.naming.provider.url", - "vm://localhost?broker.persistent=false"); - ConnectionFactory connFac = new ActiveMQConnectionFactory( - "vm://localhost?broker.persistent=false"); - Connection conn = connFac.createConnection(); - conn.start(); - // We want message to be sent when session commits, thus we run in - // transacted mode. - Session session = conn.createSession(true, Session.SESSION_TRANSACTED); - Destination hcatTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); - MessageConsumer consumer1 = session.createConsumer(hcatTopic); - consumer1.setMessageListener(this); - Destination tblTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb.mytbl"); - MessageConsumer consumer2 = session.createConsumer(tblTopic); - consumer2.setMessageListener(this); - Destination dbTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb"); - MessageConsumer consumer3 = session.createConsumer(dbTopic); - consumer3.setMessageListener(this); - - setUpHiveConf(); - hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, - NotificationListener.class.getName()); - SessionState.start(new CliSessionState(hiveConf)); - driver = new Driver(hiveConf); - client = new HiveMetaStoreClient(hiveConf); - } - - @After - public void tearDown() throws Exception { - List expectedMessages = Arrays.asList( - HCatConstants.HCAT_CREATE_DATABASE_EVENT, - HCatConstants.HCAT_CREATE_TABLE_EVENT, - HCatConstants.HCAT_ADD_PARTITION_EVENT, - HCatConstants.HCAT_DROP_PARTITION_EVENT, - HCatConstants.HCAT_DROP_TABLE_EVENT, - HCatConstants.HCAT_DROP_DATABASE_EVENT); - Assert.assertEquals(expectedMessages, actualMessages); - } - - @Test - public void testAMQListener() throws Exception { - driver.run("create database mydb"); - driver.run("use mydb"); - driver.run("create table mytbl (a string) partitioned by (b string)"); - driver.run("alter table mytbl add partition(b='2011')"); - Map kvs = new HashMap(1); - kvs.put("b", "2011"); - client.markPartitionForEvent("mydb", "mytbl", kvs, - PartitionEventType.LOAD_DONE); - driver.run("alter table mytbl drop partition(b='2011')"); - driver.run("drop table mytbl"); - driver.run("drop database mydb"); - } - - @Override - public void onMessage(Message msg) { - String event; - try { - event = msg.getStringProperty(HCatConstants.HCAT_EVENT); - String format = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); - String version = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); - String messageBody = ((TextMessage)msg).getText(); - actualMessages.add(event); - MessageDeserializer deserializer = MessageFactory.getDeserializer(format, version); - - if (event.equals(HCatConstants.HCAT_CREATE_DATABASE_EVENT)) { - - Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg - .getJMSDestination().toString()); - CreateDatabaseMessage message = deserializer.getCreateDatabaseMessage(messageBody); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateDatabaseMessage); - Assert.assertEquals("mydb", message2.getDB()); - } else if (event.equals(HCatConstants.HCAT_CREATE_TABLE_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); - CreateTableMessage message = deserializer.getCreateTableMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateTableMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((CreateTableMessage) message2).getTable()); - } else if (event.equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() - .toString()); - AddPartitionMessage message = deserializer.getAddPartitionMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - Assert.assertEquals(1, message.getPartitions().size()); - Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof AddPartitionMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((AddPartitionMessage) message2).getTable()); - Assert.assertEquals(1, ((AddPartitionMessage) message2).getPartitions().size()); - Assert.assertEquals("2011", ((AddPartitionMessage) message2).getPartitions().get(0).get("b")); - } else if (event.equals(HCatConstants.HCAT_DROP_PARTITION_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() - .toString()); - DropPartitionMessage message = deserializer.getDropPartitionMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - Assert.assertEquals(1, message.getPartitions().size()); - Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropPartitionMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((DropPartitionMessage) message2).getTable()); - Assert.assertEquals(1, ((DropPartitionMessage) message2).getPartitions().size()); - Assert.assertEquals("2011", ((DropPartitionMessage) message2).getPartitions().get(0).get("b")); - } else if (event.equals(HCatConstants.HCAT_DROP_TABLE_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); - DropTableMessage message = deserializer.getDropTableMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropTableMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((DropTableMessage) message2).getTable()); - } else if (event.equals(HCatConstants.HCAT_DROP_DATABASE_EVENT)) { - - Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg - .getJMSDestination().toString()); - DropDatabaseMessage message = deserializer.getDropDatabaseMessage(messageBody); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropDatabaseMessage); - Assert.assertEquals("mydb", message2.getDB()); - } else if (event.equals(HCatConstants.HCAT_PARTITION_DONE_EVENT)) { - // TODO: Fill in when PARTITION_DONE_EVENT is supported. - Assert.assertTrue("Unexpected: HCAT_PARTITION_DONE_EVENT not supported (yet).", false); - } else { - Assert.assertTrue("Unexpected event-type: " + event, false); - } - - } catch (JMSException e) { - e.printStackTrace(System.err); - assert false; - } + private List actualMessages = new ArrayList(); + + @Before + public void setUp() throws Exception { + System.setProperty("java.naming.factory.initial", + "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); + System.setProperty("java.naming.provider.url", + "vm://localhost?broker.persistent=false"); + ConnectionFactory connFac = new ActiveMQConnectionFactory( + "vm://localhost?broker.persistent=false"); + Connection conn = connFac.createConnection(); + conn.start(); + // We want message to be sent when session commits, thus we run in + // transacted mode. + Session session = conn.createSession(true, Session.SESSION_TRANSACTED); + Destination hcatTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + MessageConsumer consumer1 = session.createConsumer(hcatTopic); + consumer1.setMessageListener(this); + Destination tblTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb.mytbl"); + MessageConsumer consumer2 = session.createConsumer(tblTopic); + consumer2.setMessageListener(this); + Destination dbTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb"); + MessageConsumer consumer3 = session.createConsumer(dbTopic); + consumer3.setMessageListener(this); + + setUpHiveConf(); + hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, + NotificationListener.class.getName()); + SessionState.start(new CliSessionState(hiveConf)); + driver = new Driver(hiveConf); + client = new HiveMetaStoreClient(hiveConf); + } + + @After + public void tearDown() throws Exception { + List expectedMessages = Arrays.asList( + HCatConstants.HCAT_CREATE_DATABASE_EVENT, + HCatConstants.HCAT_CREATE_TABLE_EVENT, + HCatConstants.HCAT_ADD_PARTITION_EVENT, + HCatConstants.HCAT_DROP_PARTITION_EVENT, + HCatConstants.HCAT_DROP_TABLE_EVENT, + HCatConstants.HCAT_DROP_DATABASE_EVENT); + Assert.assertEquals(expectedMessages, actualMessages); + } + + @Test + public void testAMQListener() throws Exception { + driver.run("create database mydb"); + driver.run("use mydb"); + driver.run("create table mytbl (a string) partitioned by (b string)"); + driver.run("alter table mytbl add partition(b='2011')"); + Map kvs = new HashMap(1); + kvs.put("b", "2011"); + client.markPartitionForEvent("mydb", "mytbl", kvs, + PartitionEventType.LOAD_DONE); + driver.run("alter table mytbl drop partition(b='2011')"); + driver.run("drop table mytbl"); + driver.run("drop database mydb"); + } + + @Override + public void onMessage(Message msg) { + String event; + try { + event = msg.getStringProperty(HCatConstants.HCAT_EVENT); + String format = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); + String version = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); + String messageBody = ((TextMessage)msg).getText(); + actualMessages.add(event); + MessageDeserializer deserializer = MessageFactory.getDeserializer(format, version); + + if (event.equals(HCatConstants.HCAT_CREATE_DATABASE_EVENT)) { + + Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg + .getJMSDestination().toString()); + CreateDatabaseMessage message = deserializer.getCreateDatabaseMessage(messageBody); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateDatabaseMessage); + Assert.assertEquals("mydb", message2.getDB()); + } else if (event.equals(HCatConstants.HCAT_CREATE_TABLE_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); + CreateTableMessage message = deserializer.getCreateTableMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateTableMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((CreateTableMessage) message2).getTable()); + } else if (event.equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() + .toString()); + AddPartitionMessage message = deserializer.getAddPartitionMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + Assert.assertEquals(1, message.getPartitions().size()); + Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof AddPartitionMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((AddPartitionMessage) message2).getTable()); + Assert.assertEquals(1, ((AddPartitionMessage) message2).getPartitions().size()); + Assert.assertEquals("2011", ((AddPartitionMessage) message2).getPartitions().get(0).get("b")); + } else if (event.equals(HCatConstants.HCAT_DROP_PARTITION_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() + .toString()); + DropPartitionMessage message = deserializer.getDropPartitionMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + Assert.assertEquals(1, message.getPartitions().size()); + Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropPartitionMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((DropPartitionMessage) message2).getTable()); + Assert.assertEquals(1, ((DropPartitionMessage) message2).getPartitions().size()); + Assert.assertEquals("2011", ((DropPartitionMessage) message2).getPartitions().get(0).get("b")); + } else if (event.equals(HCatConstants.HCAT_DROP_TABLE_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); + DropTableMessage message = deserializer.getDropTableMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropTableMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((DropTableMessage) message2).getTable()); + } else if (event.equals(HCatConstants.HCAT_DROP_DATABASE_EVENT)) { + + Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg + .getJMSDestination().toString()); + DropDatabaseMessage message = deserializer.getDropDatabaseMessage(messageBody); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropDatabaseMessage); + Assert.assertEquals("mydb", message2.getDB()); + } else if (event.equals(HCatConstants.HCAT_PARTITION_DONE_EVENT)) { + // TODO: Fill in when PARTITION_DONE_EVENT is supported. + Assert.assertTrue("Unexpected: HCAT_PARTITION_DONE_EVENT not supported (yet).", false); + } else { + Assert.assertTrue("Unexpected event-type: " + event, false); + } + + } catch (JMSException e) { + e.printStackTrace(System.err); + assert false; } + } } diff --git a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java b/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java index 6354d27..dd69236 100644 --- a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java +++ b/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java @@ -45,79 +45,79 @@ public class TestMsgBusConnection extends TestCase { - private Driver driver; - private BrokerService broker; - private MessageConsumer consumer; + private Driver driver; + private BrokerService broker; + private MessageConsumer consumer; - @Override - protected void setUp() throws Exception { + @Override + protected void setUp() throws Exception { - super.setUp(); - broker = new BrokerService(); - // configure the broker - broker.addConnector("tcp://localhost:61616?broker.persistent=false"); + super.setUp(); + broker = new BrokerService(); + // configure the broker + broker.addConnector("tcp://localhost:61616?broker.persistent=false"); - broker.start(); + broker.start(); - System.setProperty("java.naming.factory.initial", - "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); - System.setProperty("java.naming.provider.url", "tcp://localhost:61616"); - connectClient(); - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, - NotificationListener.class.getName()); - hiveConf.set("hive.metastore.local", "true"); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, "planetlab.hcat"); - SessionState.start(new CliSessionState(hiveConf)); - driver = new Driver(hiveConf); - } + System.setProperty("java.naming.factory.initial", + "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); + System.setProperty("java.naming.provider.url", "tcp://localhost:61616"); + connectClient(); + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, + NotificationListener.class.getName()); + hiveConf.set("hive.metastore.local", "true"); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, "planetlab.hcat"); + SessionState.start(new CliSessionState(hiveConf)); + driver = new Driver(hiveConf); + } - private void connectClient() throws JMSException { - ConnectionFactory connFac = new ActiveMQConnectionFactory( - "tcp://localhost:61616"); - Connection conn = connFac.createConnection(); - conn.start(); - Session session = conn.createSession(true, Session.SESSION_TRANSACTED); - Destination hcatTopic = session.createTopic("planetlab.hcat"); - consumer = session.createConsumer(hcatTopic); - } + private void connectClient() throws JMSException { + ConnectionFactory connFac = new ActiveMQConnectionFactory( + "tcp://localhost:61616"); + Connection conn = connFac.createConnection(); + conn.start(); + Session session = conn.createSession(true, Session.SESSION_TRANSACTED); + Destination hcatTopic = session.createTopic("planetlab.hcat"); + consumer = session.createConsumer(hcatTopic); + } - public void testConnection() throws Exception { + public void testConnection() throws Exception { - try { - driver.run("create database testconndb"); - Message msg = consumer.receive(); - assertTrue("Expected TextMessage", msg instanceof TextMessage); - assertEquals(HCatConstants.HCAT_CREATE_DATABASE_EVENT, - msg.getStringProperty(HCatConstants.HCAT_EVENT)); - assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); - HCatEventMessage messageObject = MessagingUtils.getMessage(msg); - assertEquals("testconndb", messageObject.getDB()); - broker.stop(); - driver.run("drop database testconndb cascade"); - broker.start(true); - connectClient(); - driver.run("create database testconndb"); - msg = consumer.receive(); - assertEquals(HCatConstants.HCAT_CREATE_DATABASE_EVENT, - msg.getStringProperty(HCatConstants.HCAT_EVENT)); - assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); - assertEquals("testconndb", messageObject.getDB()); - driver.run("drop database testconndb cascade"); - msg = consumer.receive(); - assertEquals(HCatConstants.HCAT_DROP_DATABASE_EVENT, - msg.getStringProperty(HCatConstants.HCAT_EVENT)); - assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); - assertEquals("testconndb", messageObject.getDB()); - } catch (NoSuchObjectException nsoe) { - nsoe.printStackTrace(System.err); - assert false; - } catch (AlreadyExistsException aee) { - aee.printStackTrace(System.err); - assert false; - } + try { + driver.run("create database testconndb"); + Message msg = consumer.receive(); + assertTrue("Expected TextMessage", msg instanceof TextMessage); + assertEquals(HCatConstants.HCAT_CREATE_DATABASE_EVENT, + msg.getStringProperty(HCatConstants.HCAT_EVENT)); + assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); + HCatEventMessage messageObject = MessagingUtils.getMessage(msg); + assertEquals("testconndb", messageObject.getDB()); + broker.stop(); + driver.run("drop database testconndb cascade"); + broker.start(true); + connectClient(); + driver.run("create database testconndb"); + msg = consumer.receive(); + assertEquals(HCatConstants.HCAT_CREATE_DATABASE_EVENT, + msg.getStringProperty(HCatConstants.HCAT_EVENT)); + assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); + assertEquals("testconndb", messageObject.getDB()); + driver.run("drop database testconndb cascade"); + msg = consumer.receive(); + assertEquals(HCatConstants.HCAT_DROP_DATABASE_EVENT, + msg.getStringProperty(HCatConstants.HCAT_EVENT)); + assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); + assertEquals("testconndb", messageObject.getDB()); + } catch (NoSuchObjectException nsoe) { + nsoe.printStackTrace(System.err); + assert false; + } catch (AlreadyExistsException aee) { + aee.printStackTrace(System.err); + assert false; } + } } diff --git a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestNotificationListener.java b/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestNotificationListener.java index 4d7c4da..da1ae62 100644 --- a/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestNotificationListener.java +++ b/hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestNotificationListener.java @@ -62,158 +62,158 @@ public class TestNotificationListener extends HCatBaseTest implements MessageListener { - private List actualMessages = new ArrayList(); - - @Before - public void setUp() throws Exception { - System.setProperty("java.naming.factory.initial", - "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); - System.setProperty("java.naming.provider.url", - "vm://localhost?broker.persistent=false"); - ConnectionFactory connFac = new ActiveMQConnectionFactory( - "vm://localhost?broker.persistent=false"); - Connection conn = connFac.createConnection(); - conn.start(); - // We want message to be sent when session commits, thus we run in - // transacted mode. - Session session = conn.createSession(true, Session.SESSION_TRANSACTED); - Destination hcatTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); - MessageConsumer consumer1 = session.createConsumer(hcatTopic); - consumer1.setMessageListener(this); - Destination tblTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb.mytbl"); - MessageConsumer consumer2 = session.createConsumer(tblTopic); - consumer2.setMessageListener(this); - Destination dbTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb"); - MessageConsumer consumer3 = session.createConsumer(dbTopic); - consumer3.setMessageListener(this); - - setUpHiveConf(); - hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, - NotificationListener.class.getName()); - SessionState.start(new CliSessionState(hiveConf)); - driver = new Driver(hiveConf); - client = new HiveMetaStoreClient(hiveConf); - } - - @After - public void tearDown() throws Exception { - List expectedMessages = Arrays.asList( - HCatConstants.HCAT_CREATE_DATABASE_EVENT, - HCatConstants.HCAT_CREATE_TABLE_EVENT, - HCatConstants.HCAT_ADD_PARTITION_EVENT, - HCatConstants.HCAT_DROP_PARTITION_EVENT, - HCatConstants.HCAT_DROP_TABLE_EVENT, - HCatConstants.HCAT_DROP_DATABASE_EVENT); - Assert.assertEquals(expectedMessages, actualMessages); - } - - @Test - public void testAMQListener() throws Exception { - driver.run("create database mydb"); - driver.run("use mydb"); - driver.run("create table mytbl (a string) partitioned by (b string)"); - driver.run("alter table mytbl add partition(b='2011')"); - Map kvs = new HashMap(1); - kvs.put("b", "2011"); - client.markPartitionForEvent("mydb", "mytbl", kvs, - PartitionEventType.LOAD_DONE); - driver.run("alter table mytbl drop partition(b='2011')"); - driver.run("drop table mytbl"); - driver.run("drop database mydb"); - } - - @Override - public void onMessage(Message msg) { - String event; - try { - event = msg.getStringProperty(HCatConstants.HCAT_EVENT); - String format = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); - String version = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); - String messageBody = ((TextMessage)msg).getText(); - actualMessages.add(event); - MessageDeserializer deserializer = MessageFactory.getDeserializer(format, version); - - if (event.equals(HCatConstants.HCAT_CREATE_DATABASE_EVENT)) { - - Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg - .getJMSDestination().toString()); - CreateDatabaseMessage message = deserializer.getCreateDatabaseMessage(messageBody); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateDatabaseMessage); - Assert.assertEquals("mydb", message2.getDB()); - } else if (event.equals(HCatConstants.HCAT_CREATE_TABLE_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); - CreateTableMessage message = deserializer.getCreateTableMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateTableMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((CreateTableMessage) message2).getTable()); - } else if (event.equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() - .toString()); - AddPartitionMessage message = deserializer.getAddPartitionMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - Assert.assertEquals(1, message.getPartitions().size()); - Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof AddPartitionMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((AddPartitionMessage) message2).getTable()); - Assert.assertEquals(1, ((AddPartitionMessage) message2).getPartitions().size()); - Assert.assertEquals("2011", ((AddPartitionMessage) message2).getPartitions().get(0).get("b")); - } else if (event.equals(HCatConstants.HCAT_DROP_PARTITION_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() - .toString()); - DropPartitionMessage message = deserializer.getDropPartitionMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - Assert.assertEquals(1, message.getPartitions().size()); - Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropPartitionMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((DropPartitionMessage) message2).getTable()); - Assert.assertEquals(1, ((DropPartitionMessage) message2).getPartitions().size()); - Assert.assertEquals("2011", ((DropPartitionMessage) message2).getPartitions().get(0).get("b")); - } else if (event.equals(HCatConstants.HCAT_DROP_TABLE_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); - DropTableMessage message = deserializer.getDropTableMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropTableMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((DropTableMessage) message2).getTable()); - } else if (event.equals(HCatConstants.HCAT_DROP_DATABASE_EVENT)) { - - Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg - .getJMSDestination().toString()); - DropDatabaseMessage message = deserializer.getDropDatabaseMessage(messageBody); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropDatabaseMessage); - Assert.assertEquals("mydb", message2.getDB()); - } else if (event.equals(HCatConstants.HCAT_PARTITION_DONE_EVENT)) { - // TODO: Fill in when PARTITION_DONE_EVENT is supported. - Assert.assertTrue("Unexpected: HCAT_PARTITION_DONE_EVENT not supported (yet).", false); - } else { - Assert.assertTrue("Unexpected event-type: " + event, false); - } - - } catch (JMSException e) { - e.printStackTrace(System.err); - assert false; - } + private List actualMessages = new ArrayList(); + + @Before + public void setUp() throws Exception { + System.setProperty("java.naming.factory.initial", + "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); + System.setProperty("java.naming.provider.url", + "vm://localhost?broker.persistent=false"); + ConnectionFactory connFac = new ActiveMQConnectionFactory( + "vm://localhost?broker.persistent=false"); + Connection conn = connFac.createConnection(); + conn.start(); + // We want message to be sent when session commits, thus we run in + // transacted mode. + Session session = conn.createSession(true, Session.SESSION_TRANSACTED); + Destination hcatTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + MessageConsumer consumer1 = session.createConsumer(hcatTopic); + consumer1.setMessageListener(this); + Destination tblTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb.mytbl"); + MessageConsumer consumer2 = session.createConsumer(tblTopic); + consumer2.setMessageListener(this); + Destination dbTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb"); + MessageConsumer consumer3 = session.createConsumer(dbTopic); + consumer3.setMessageListener(this); + + setUpHiveConf(); + hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, + NotificationListener.class.getName()); + SessionState.start(new CliSessionState(hiveConf)); + driver = new Driver(hiveConf); + client = new HiveMetaStoreClient(hiveConf); + } + + @After + public void tearDown() throws Exception { + List expectedMessages = Arrays.asList( + HCatConstants.HCAT_CREATE_DATABASE_EVENT, + HCatConstants.HCAT_CREATE_TABLE_EVENT, + HCatConstants.HCAT_ADD_PARTITION_EVENT, + HCatConstants.HCAT_DROP_PARTITION_EVENT, + HCatConstants.HCAT_DROP_TABLE_EVENT, + HCatConstants.HCAT_DROP_DATABASE_EVENT); + Assert.assertEquals(expectedMessages, actualMessages); + } + + @Test + public void testAMQListener() throws Exception { + driver.run("create database mydb"); + driver.run("use mydb"); + driver.run("create table mytbl (a string) partitioned by (b string)"); + driver.run("alter table mytbl add partition(b='2011')"); + Map kvs = new HashMap(1); + kvs.put("b", "2011"); + client.markPartitionForEvent("mydb", "mytbl", kvs, + PartitionEventType.LOAD_DONE); + driver.run("alter table mytbl drop partition(b='2011')"); + driver.run("drop table mytbl"); + driver.run("drop database mydb"); + } + + @Override + public void onMessage(Message msg) { + String event; + try { + event = msg.getStringProperty(HCatConstants.HCAT_EVENT); + String format = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); + String version = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); + String messageBody = ((TextMessage)msg).getText(); + actualMessages.add(event); + MessageDeserializer deserializer = MessageFactory.getDeserializer(format, version); + + if (event.equals(HCatConstants.HCAT_CREATE_DATABASE_EVENT)) { + + Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg + .getJMSDestination().toString()); + CreateDatabaseMessage message = deserializer.getCreateDatabaseMessage(messageBody); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateDatabaseMessage); + Assert.assertEquals("mydb", message2.getDB()); + } else if (event.equals(HCatConstants.HCAT_CREATE_TABLE_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); + CreateTableMessage message = deserializer.getCreateTableMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateTableMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((CreateTableMessage) message2).getTable()); + } else if (event.equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() + .toString()); + AddPartitionMessage message = deserializer.getAddPartitionMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + Assert.assertEquals(1, message.getPartitions().size()); + Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof AddPartitionMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((AddPartitionMessage) message2).getTable()); + Assert.assertEquals(1, ((AddPartitionMessage) message2).getPartitions().size()); + Assert.assertEquals("2011", ((AddPartitionMessage) message2).getPartitions().get(0).get("b")); + } else if (event.equals(HCatConstants.HCAT_DROP_PARTITION_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() + .toString()); + DropPartitionMessage message = deserializer.getDropPartitionMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + Assert.assertEquals(1, message.getPartitions().size()); + Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropPartitionMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((DropPartitionMessage) message2).getTable()); + Assert.assertEquals(1, ((DropPartitionMessage) message2).getPartitions().size()); + Assert.assertEquals("2011", ((DropPartitionMessage) message2).getPartitions().get(0).get("b")); + } else if (event.equals(HCatConstants.HCAT_DROP_TABLE_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); + DropTableMessage message = deserializer.getDropTableMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropTableMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((DropTableMessage) message2).getTable()); + } else if (event.equals(HCatConstants.HCAT_DROP_DATABASE_EVENT)) { + + Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg + .getJMSDestination().toString()); + DropDatabaseMessage message = deserializer.getDropDatabaseMessage(messageBody); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropDatabaseMessage); + Assert.assertEquals("mydb", message2.getDB()); + } else if (event.equals(HCatConstants.HCAT_PARTITION_DONE_EVENT)) { + // TODO: Fill in when PARTITION_DONE_EVENT is supported. + Assert.assertTrue("Unexpected: HCAT_PARTITION_DONE_EVENT not supported (yet).", false); + } else { + Assert.assertTrue("Unexpected event-type: " + event, false); + } + + } catch (JMSException e) { + e.printStackTrace(System.err); + assert false; } + } } diff --git a/hcatalog/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java b/hcatalog/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java index 041e027..4ff4c78 100644 --- a/hcatalog/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java +++ b/hcatalog/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java @@ -40,176 +40,176 @@ */ public class RCFileGenerator { - private static Configuration conf = new Configuration(); - private static Path basedir; - private static FileSystem fs; - private static Properties tbl; - private static Random rand; - - private static Path getFile(String filename) throws Exception { - return new Path(basedir, filename); + private static Configuration conf = new Configuration(); + private static Path basedir; + private static FileSystem fs; + private static Properties tbl; + private static Random rand; + + private static Path getFile(String filename) throws Exception { + return new Path(basedir, filename); + } + + private static String[] firstName = {"alice", "bob", "calvin", "david", + "ethan", "fred", "gabriella", "holly", "irene", "jessica", "katie", + "luke", "mike", "nick", "oscar", "priscilla", "quinn", "rachel", + "sarah", "tom", "ulysses", "victor", "wendy", "xavier", "yuri", + "zach"}; + + private static String[] lastName = {"allen", "brown", "carson", + "davidson", "ellison", "falkner", "garcia", "hernandez", "ichabod", + "johnson", "king", "laertes", "miller", "nixon", "ovid", "polk", + "quirinius", "robinson", "steinbeck", "thompson", "underhill", + "van buren", "white", "xylophone", "young", "zipper"}; + + private static String randomName() { + StringBuffer buf = + new StringBuffer(firstName[rand.nextInt(firstName.length)]); + buf.append(' '); + buf.append(lastName[rand.nextInt(lastName.length)]); + return buf.toString(); + } + + private static int randomAge() { + return rand.nextInt(60) + 18; + } + + private static double randomGpa() { + return 4 * rand.nextFloat(); + } + + private static String[] registration = {"democrat", "green", + "independent", "libertarian", "republican", "socialist"}; + + private static String randomRegistration() { + return registration[rand.nextInt(registration.length)]; + } + + private static double randomContribution() { + return rand.nextFloat() * 1000; + } + + private static byte[] randomMap() throws Exception { + int len = rand.nextInt(5) + 1; + + StringBuffer buf = new StringBuffer(); + for (int i = 0; i < len; i++) { + if (i != 0) buf.append('\u0002'); + buf.append(firstName[rand.nextInt(26)]); + buf.append('\u0003'); + buf.append(lastName[rand.nextInt(26)]); } - - private static String[] firstName = {"alice", "bob", "calvin", "david", - "ethan", "fred", "gabriella", "holly", "irene", "jessica", "katie", - "luke", "mike", "nick", "oscar", "priscilla", "quinn", "rachel", - "sarah", "tom", "ulysses", "victor", "wendy", "xavier", "yuri", - "zach"}; - - private static String[] lastName = {"allen", "brown", "carson", - "davidson", "ellison", "falkner", "garcia", "hernandez", "ichabod", - "johnson", "king", "laertes", "miller", "nixon", "ovid", "polk", - "quirinius", "robinson", "steinbeck", "thompson", "underhill", - "van buren", "white", "xylophone", "young", "zipper"}; - - private static String randomName() { - StringBuffer buf = - new StringBuffer(firstName[rand.nextInt(firstName.length)]); - buf.append(' '); - buf.append(lastName[rand.nextInt(lastName.length)]); - return buf.toString(); - } - - private static int randomAge() { - return rand.nextInt(60) + 18; - } - - private static double randomGpa() { - return 4 * rand.nextFloat(); + return buf.toString().getBytes("UTF-8"); + } + + private static byte[] randomArray() throws Exception { + int len = rand.nextInt(5) + 1; + + StringBuffer buf = new StringBuffer(); + for (int i = 0; i < len; i++) { + if (i != 0) buf.append('\u0002'); + buf.append(Integer.valueOf(randomAge()).toString()); + buf.append('\u0003'); + buf.append(randomName()); } - - private static String[] registration = {"democrat", "green", - "independent", "libertarian", "republican", "socialist"}; - - private static String randomRegistration() { - return registration[rand.nextInt(registration.length)]; + return buf.toString().getBytes("UTF-8"); + } + + private static void usage() { + System.err.println("Usage: rcfilegen format number_of_rows " + + "output_file plain_output_file"); + System.err.println(" format one of: student voter alltypes"); + System.exit(1); + } + + public static void main(String[] args) throws Exception { + if (args.length != 4) usage(); + + String format = args[0]; + int numRows = Integer.valueOf(args[1]); + if (numRows < 1) usage(); + String output = args[2]; + String plainOutput = args[3]; + + fs = FileSystem.getLocal(conf); + basedir = new Path("."); + + genData(format, numRows, output, plainOutput); + } + + private static void genData(String format, + int numRows, + String output, String plainOutput) throws Exception { + int numFields = 0; + if (format.equals("student")) { + rand = new Random(numRows); + numFields = 3; + } else if (format.equals("voter")) { + rand = new Random(1000000000 + numRows); + numFields = 4; + } else if (format.equals("alltypes")) { + rand = new Random(2000000000L + numRows); + numFields = 10; } - private static double randomContribution() { - return rand.nextFloat() * 1000; + RCFileOutputFormat.setColumnNumber(conf, numFields); + RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), + null, new DefaultCodec()); + + PrintWriter pw = new PrintWriter(new FileWriter(plainOutput)); + + for (int j = 0; j < numRows; j++) { + BytesRefArrayWritable row = new BytesRefArrayWritable(numFields); + + byte[][] fields = null; + + if (format.equals("student")) { + byte[][] f = { + randomName().getBytes("UTF-8"), + Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), + Double.valueOf(randomGpa()).toString().getBytes("UTF-8") + }; + fields = f; + } else if (format.equals("voter")) { + byte[][] f = { + randomName().getBytes("UTF-8"), + Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), + randomRegistration().getBytes("UTF-8"), + Double.valueOf(randomContribution()).toString().getBytes("UTF-8") + }; + fields = f; + } else if (format.equals("alltypes")) { + byte[][] f = { + Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), + Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), + Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), + Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), + Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), + Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), + randomName().getBytes("UTF-8"), + randomMap(), + randomArray() + }; + fields = f; + } + + + for (int i = 0; i < fields.length; i++) { + BytesRefWritable field = new BytesRefWritable(fields[i], 0, + fields[i].length); + row.set(i, field); + pw.print(new String(fields[i])); + if (i != fields.length - 1) + pw.print("\t"); + else + pw.println(); + } + + writer.append(row); } - private static byte[] randomMap() throws Exception { - int len = rand.nextInt(5) + 1; - - StringBuffer buf = new StringBuffer(); - for (int i = 0; i < len; i++) { - if (i != 0) buf.append('\u0002'); - buf.append(firstName[rand.nextInt(26)]); - buf.append('\u0003'); - buf.append(lastName[rand.nextInt(26)]); - } - return buf.toString().getBytes("UTF-8"); - } - - private static byte[] randomArray() throws Exception { - int len = rand.nextInt(5) + 1; - - StringBuffer buf = new StringBuffer(); - for (int i = 0; i < len; i++) { - if (i != 0) buf.append('\u0002'); - buf.append(Integer.valueOf(randomAge()).toString()); - buf.append('\u0003'); - buf.append(randomName()); - } - return buf.toString().getBytes("UTF-8"); - } - - private static void usage() { - System.err.println("Usage: rcfilegen format number_of_rows " + - "output_file plain_output_file"); - System.err.println(" format one of: student voter alltypes"); - System.exit(1); - } - - public static void main(String[] args) throws Exception { - if (args.length != 4) usage(); - - String format = args[0]; - int numRows = Integer.valueOf(args[1]); - if (numRows < 1) usage(); - String output = args[2]; - String plainOutput = args[3]; - - fs = FileSystem.getLocal(conf); - basedir = new Path("."); - - genData(format, numRows, output, plainOutput); - } - - private static void genData(String format, - int numRows, - String output, String plainOutput) throws Exception { - int numFields = 0; - if (format.equals("student")) { - rand = new Random(numRows); - numFields = 3; - } else if (format.equals("voter")) { - rand = new Random(1000000000 + numRows); - numFields = 4; - } else if (format.equals("alltypes")) { - rand = new Random(2000000000L + numRows); - numFields = 10; - } - - RCFileOutputFormat.setColumnNumber(conf, numFields); - RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), - null, new DefaultCodec()); - - PrintWriter pw = new PrintWriter(new FileWriter(plainOutput)); - - for (int j = 0; j < numRows; j++) { - BytesRefArrayWritable row = new BytesRefArrayWritable(numFields); - - byte[][] fields = null; - - if (format.equals("student")) { - byte[][] f = { - randomName().getBytes("UTF-8"), - Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), - Double.valueOf(randomGpa()).toString().getBytes("UTF-8") - }; - fields = f; - } else if (format.equals("voter")) { - byte[][] f = { - randomName().getBytes("UTF-8"), - Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), - randomRegistration().getBytes("UTF-8"), - Double.valueOf(randomContribution()).toString().getBytes("UTF-8") - }; - fields = f; - } else if (format.equals("alltypes")) { - byte[][] f = { - Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), - Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), - Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), - Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), - Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), - Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), - randomName().getBytes("UTF-8"), - randomMap(), - randomArray() - }; - fields = f; - } - - - for (int i = 0; i < fields.length; i++) { - BytesRefWritable field = new BytesRefWritable(fields[i], 0, - fields[i].length); - row.set(i, field); - pw.print(new String(fields[i])); - if (i != fields.length - 1) - pw.print("\t"); - else - pw.println(); - } - - writer.append(row); - } - - writer.close(); - pw.close(); - } + writer.close(); + pw.close(); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderMaster.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderMaster.java index 437bee3..26291cd 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderMaster.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderMaster.java @@ -38,34 +38,34 @@ public class DataReaderMaster { - public static void main(String[] args) throws FileNotFoundException, IOException { + public static void main(String[] args) throws FileNotFoundException, IOException { - // This config contains all the configuration that master node wants to provide - // to the HCatalog. - Properties externalConfigs = new Properties(); - externalConfigs.load(new FileReader(args[0])); - Map config = new HashMap(); + // This config contains all the configuration that master node wants to provide + // to the HCatalog. + Properties externalConfigs = new Properties(); + externalConfigs.load(new FileReader(args[0])); + Map config = new HashMap(); - for (Entry kv : externalConfigs.entrySet()) { - config.put((String) kv.getKey(), (String) kv.getValue()); - } + for (Entry kv : externalConfigs.entrySet()) { + config.put((String) kv.getKey(), (String) kv.getValue()); + } - // This piece of code runs in master node and gets necessary context. - ReaderContext context = runsInMaster(config); + // This piece of code runs in master node and gets necessary context. + ReaderContext context = runsInMaster(config); - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(args[1]))); - oos.writeObject(context); - oos.flush(); - oos.close(); - // Master node will serialize readercontext and will make it available at slaves. - } + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(args[1]))); + oos.writeObject(context); + oos.flush(); + oos.close(); + // Master node will serialize readercontext and will make it available at slaves. + } - private static ReaderContext runsInMaster(Map config) throws HCatException { + private static ReaderContext runsInMaster(Map config) throws HCatException { - ReadEntity.Builder builder = new ReadEntity.Builder(); - ReadEntity entity = builder.withTable(config.get("table")).build(); - HCatReader reader = DataTransferFactory.getHCatReader(entity, config); - ReaderContext cntxt = reader.prepareRead(); - return cntxt; - } + ReadEntity.Builder builder = new ReadEntity.Builder(); + ReadEntity entity = builder.withTable(config.get("table")).build(); + HCatReader reader = DataTransferFactory.getHCatReader(entity, config); + ReaderContext cntxt = reader.prepareRead(); + return cntxt; + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderSlave.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderSlave.java index 688b736..6debe97 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderSlave.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderSlave.java @@ -35,28 +35,28 @@ public class DataReaderSlave { - public static void main(String[] args) throws IOException, ClassNotFoundException { + public static void main(String[] args) throws IOException, ClassNotFoundException { - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[0]))); - ReaderContext cntxt = (ReaderContext) ois.readObject(); - ois.close(); + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[0]))); + ReaderContext cntxt = (ReaderContext) ois.readObject(); + ois.close(); - String[] inpSlitsToRead = args[1].split(","); - List splits = cntxt.getSplits(); + String[] inpSlitsToRead = args[1].split(","); + List splits = cntxt.getSplits(); - for (int i = 0; i < inpSlitsToRead.length; i++) { - InputSplit split = splits.get(Integer.parseInt(inpSlitsToRead[i])); - HCatReader reader = DataTransferFactory.getHCatReader(split, cntxt.getConf()); - Iterator itr = reader.read(); - File f = new File(args[2] + "-" + i); - f.delete(); - BufferedWriter outFile = new BufferedWriter(new FileWriter(f)); - while (itr.hasNext()) { - String rec = itr.next().toString().replaceFirst("\\s+$", ""); - System.err.println(rec); - outFile.write(rec + "\n"); - } - outFile.close(); - } + for (int i = 0; i < inpSlitsToRead.length; i++) { + InputSplit split = splits.get(Integer.parseInt(inpSlitsToRead[i])); + HCatReader reader = DataTransferFactory.getHCatReader(split, cntxt.getConf()); + Iterator itr = reader.read(); + File f = new File(args[2] + "-" + i); + f.delete(); + BufferedWriter outFile = new BufferedWriter(new FileWriter(f)); + while (itr.hasNext()) { + String rec = itr.next().toString().replaceFirst("\\s+$", ""); + System.err.println(rec); + outFile.write(rec + "\n"); + } + outFile.close(); } + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterMaster.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterMaster.java index 0b92035..1fafb61 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterMaster.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterMaster.java @@ -40,57 +40,57 @@ public class DataWriterMaster { - public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { + public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { - // This config contains all the configuration that master node wants to provide - // to the HCatalog. - Properties externalConfigs = new Properties(); - externalConfigs.load(new FileReader(args[0])); - Map config = new HashMap(); + // This config contains all the configuration that master node wants to provide + // to the HCatalog. + Properties externalConfigs = new Properties(); + externalConfigs.load(new FileReader(args[0])); + Map config = new HashMap(); - for (Entry kv : externalConfigs.entrySet()) { - System.err.println("k: " + kv.getKey() + "\t v: " + kv.getValue()); - config.put((String) kv.getKey(), (String) kv.getValue()); - } + for (Entry kv : externalConfigs.entrySet()) { + System.err.println("k: " + kv.getKey() + "\t v: " + kv.getValue()); + config.put((String) kv.getKey(), (String) kv.getValue()); + } - if (args.length == 3 && "commit".equalsIgnoreCase(args[2])) { - // Then, master commits if everything goes well. - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[1]))); - WriterContext cntxt = (WriterContext) ois.readObject(); - commit(config, true, cntxt); - System.exit(0); - } - // This piece of code runs in master node and gets necessary context. - WriterContext cntxt = runsInMaster(config); + if (args.length == 3 && "commit".equalsIgnoreCase(args[2])) { + // Then, master commits if everything goes well. + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[1]))); + WriterContext cntxt = (WriterContext) ois.readObject(); + commit(config, true, cntxt); + System.exit(0); + } + // This piece of code runs in master node and gets necessary context. + WriterContext cntxt = runsInMaster(config); - // Master node will serialize writercontext and will make it available at slaves. - File f = new File(args[1]); - f.delete(); - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); - oos.writeObject(cntxt); - oos.flush(); - oos.close(); - } + // Master node will serialize writercontext and will make it available at slaves. + File f = new File(args[1]); + f.delete(); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(cntxt); + oos.flush(); + oos.close(); + } - private static WriterContext runsInMaster(Map config) throws HCatException { + private static WriterContext runsInMaster(Map config) throws HCatException { - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable(config.get("table")).build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - WriterContext info = writer.prepareWrite(); - return info; - } + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable(config.get("table")).build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + WriterContext info = writer.prepareWrite(); + return info; + } - private static void commit(Map config, boolean status, WriterContext cntxt) throws HCatException { + private static void commit(Map config, boolean status, WriterContext cntxt) throws HCatException { - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable(config.get("table")).build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - if (status) { - writer.commit(cntxt); - } else { - writer.abort(cntxt); - } + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable(config.get("table")).build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + if (status) { + writer.commit(cntxt); + } else { + writer.abort(cntxt); } + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterSlave.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterSlave.java index b57113c..c470fda 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterSlave.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterSlave.java @@ -37,51 +37,51 @@ public class DataWriterSlave { - public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { + public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(args[0])); - WriterContext cntxt = (WriterContext) ois.readObject(); - ois.close(); + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(args[0])); + WriterContext cntxt = (WriterContext) ois.readObject(); + ois.close(); - HCatWriter writer = DataTransferFactory.getHCatWriter(cntxt); - writer.write(new HCatRecordItr(args[1])); + HCatWriter writer = DataTransferFactory.getHCatWriter(cntxt); + writer.write(new HCatRecordItr(args[1])); - } + } - private static class HCatRecordItr implements Iterator { + private static class HCatRecordItr implements Iterator { - BufferedReader reader; - String curLine; + BufferedReader reader; + String curLine; - public HCatRecordItr(String fileName) throws FileNotFoundException { - reader = new BufferedReader(new FileReader(new File(fileName))); - } + public HCatRecordItr(String fileName) throws FileNotFoundException { + reader = new BufferedReader(new FileReader(new File(fileName))); + } - @Override - public boolean hasNext() { - try { - curLine = reader.readLine(); - } catch (IOException e) { - e.printStackTrace(); - } - return null == curLine ? false : true; - } + @Override + public boolean hasNext() { + try { + curLine = reader.readLine(); + } catch (IOException e) { + e.printStackTrace(); + } + return null == curLine ? false : true; + } - @Override - public HCatRecord next() { + @Override + public HCatRecord next() { - String[] fields = curLine.split("\t"); - List data = new ArrayList(3); - data.add(fields[0]); - data.add(Integer.parseInt(fields[1])); - data.add(Double.parseDouble(fields[2])); - return new DefaultHCatRecord(data); - } + String[] fields = curLine.split("\t"); + List data = new ArrayList(3); + data.add(fields[0]); + data.add(Integer.parseInt(fields[1])); + data.add(Double.parseDouble(fields[2])); + return new DefaultHCatRecord(data); + } - @Override - public void remove() { - // TODO Auto-generated method stub + @Override + public void remove() { + // TODO Auto-generated method stub - } } + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/GroupByAge.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/GroupByAge.java index 78c0811..2352bdd 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/GroupByAge.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/GroupByAge.java @@ -53,82 +53,82 @@ */ public class GroupByAge extends Configured implements Tool { - public static class Map extends - Mapper { - - int age; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - age = (Integer) value.get(1); - context.write(new IntWritable(age), new IntWritable(1)); - } + public static class Map extends + Mapper { + + int age; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + age = (Integer) value.get(1); + context.write(new IntWritable(age), new IntWritable(1)); } - - public static class Reduce extends Reducer { - - - @Override - protected void reduce(IntWritable key, java.lang.Iterable - values, org.apache.hadoop.mapreduce.Reducer.Context context) - throws IOException, InterruptedException { - int sum = 0; - Iterator iter = values.iterator(); - while (iter.hasNext()) { - sum++; - iter.next(); - } - HCatRecord record = new DefaultHCatRecord(2); - record.set(0, key.get()); - record.set(1, sum); - - context.write(null, record); - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "GroupByAge"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(GroupByAge.class); - job.setMapperClass(Map.class); - job.setReducerClass(Reduce.class); - job.setMapOutputKeyClass(IntWritable.class); - job.setMapOutputValueClass(IntWritable.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatOutputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new GroupByAge(), args); - System.exit(exitCode); + } + + public static class Reduce extends Reducer { + + + @Override + protected void reduce(IntWritable key, java.lang.Iterable + values, org.apache.hadoop.mapreduce.Reducer.Context context) + throws IOException, InterruptedException { + int sum = 0; + Iterator iter = values.iterator(); + while (iter.hasNext()) { + sum++; + iter.next(); + } + HCatRecord record = new DefaultHCatRecord(2); + record.set(0, key.get()); + record.set(1, sum); + + context.write(null, record); } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "GroupByAge"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(GroupByAge.class); + job.setMapperClass(Map.class); + job.setReducerClass(Reduce.class); + job.setMapOutputKeyClass(IntWritable.class); + job.setMapOutputValueClass(IntWritable.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatOutputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new GroupByAge(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HBaseReadWrite.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HBaseReadWrite.java index a0808d3..f0d7221 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HBaseReadWrite.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HBaseReadWrite.java @@ -56,137 +56,137 @@ */ public class HBaseReadWrite extends Configured implements Tool { - public static class HBaseWriteMap extends - Mapper { - - String name; - String age; - String gpa; - - @Override - protected void map( - LongWritable key, - Text value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - String line = value.toString(); - String[] tokens = line.split("\t"); - name = tokens[0]; - - context.write(new Text(name), value); - } + public static class HBaseWriteMap extends + Mapper { + + String name; + String age; + String gpa; + + @Override + protected void map( + LongWritable key, + Text value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + String line = value.toString(); + String[] tokens = line.split("\t"); + name = tokens[0]; + + context.write(new Text(name), value); } - - - public static class HBaseWriteReduce extends - Reducer { - - String name; - String age; - String gpa; - - @Override - protected void reduce(Text key, Iterable values, Context context) - throws IOException, InterruptedException { - name = key.toString(); - int count = 0; - double sum = 0; - for (Text value : values) { - String line = value.toString(); - String[] tokens = line.split("\t"); - name = tokens[0]; - age = tokens[1]; - gpa = tokens[2]; - - count++; - sum += Double.parseDouble(gpa.toString()); - } - - HCatRecord record = new DefaultHCatRecord(2); - record.set(0, name); - record.set(1, Double.toString(sum)); - - context.write(null, record); - } - } - - public static class HBaseReadMap extends - Mapper { - - String name; - String age; - String gpa; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - gpa = (String) value.get(1); - context.write(new Text(name), new Text(gpa)); - } + } + + + public static class HBaseWriteReduce extends + Reducer { + + String name; + String age; + String gpa; + + @Override + protected void reduce(Text key, Iterable values, Context context) + throws IOException, InterruptedException { + name = key.toString(); + int count = 0; + double sum = 0; + for (Text value : values) { + String line = value.toString(); + String[] tokens = line.split("\t"); + name = tokens[0]; + age = tokens[1]; + gpa = tokens[2]; + + count++; + sum += Double.parseDouble(gpa.toString()); + } + + HCatRecord record = new DefaultHCatRecord(2); + record.set(0, name); + record.set(1, Double.toString(sum)); + + context.write(null, record); } - - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputDir = args[1]; - String tableName = args[2]; - String outputDir = args[3]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - conf.set("hcat.hbase.output.bulkMode", "false"); - Job job = new Job(conf, "HBaseWrite"); - FileInputFormat.setInputPaths(job, inputDir); - - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(HBaseReadWrite.class); - job.setMapperClass(HBaseWriteMap.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(Text.class); - job.setReducerClass(HBaseWriteReduce.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - tableName, null)); - - boolean succ = job.waitForCompletion(true); - - if (!succ) return 1; - - job = new Job(conf, "HBaseRead"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, tableName, - null)); - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(HBaseReadWrite.class); - job.setMapperClass(HBaseReadMap.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - TextOutputFormat.setOutputPath(job, new Path(outputDir)); - - succ = job.waitForCompletion(true); - - if (!succ) return 2; - - return 0; - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new HBaseReadWrite(), args); - System.exit(exitCode); + } + + public static class HBaseReadMap extends + Mapper { + + String name; + String age; + String gpa; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + gpa = (String) value.get(1); + context.write(new Text(name), new Text(gpa)); } + } + + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputDir = args[1]; + String tableName = args[2]; + String outputDir = args[3]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + conf.set("hcat.hbase.output.bulkMode", "false"); + Job job = new Job(conf, "HBaseWrite"); + FileInputFormat.setInputPaths(job, inputDir); + + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(HBaseReadWrite.class); + job.setMapperClass(HBaseWriteMap.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(Text.class); + job.setReducerClass(HBaseWriteReduce.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + tableName, null)); + + boolean succ = job.waitForCompletion(true); + + if (!succ) return 1; + + job = new Job(conf, "HBaseRead"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, tableName, + null)); + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(HBaseReadWrite.class); + job.setMapperClass(HBaseReadMap.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + TextOutputFormat.setOutputPath(job, new Path(outputDir)); + + succ = job.waitForCompletion(true); + + if (!succ) return 2; + + return 0; + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new HBaseReadWrite(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTestDriver.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTestDriver.java index b9d6bed..c83d370 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTestDriver.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTestDriver.java @@ -27,33 +27,33 @@ */ public class HCatTestDriver { - public static void main(String argv[]) { - int exitCode = -1; - ProgramDriver pgd = new ProgramDriver(); - try { - pgd.addClass("typedatacheck", TypeDataCheck.class, - "A map/reduce program that checks the type of each field and" + - " outputs the entire table (to test hcat)."); - pgd.addClass("sumnumbers", SumNumbers.class, - "A map/reduce program that performs a group by on the first column and a " + - "SUM operation on the other columns of the \"numbers\" table."); - pgd.addClass("storenumbers", StoreNumbers.class, "A map/reduce program that " + - "reads from the \"numbers\" table and adds 10 to each fields and writes " + - "to the \"numbers_partitioned\" table into the datestamp=20100101 " + - "partition OR the \"numbers_empty_initially\" table based on a " + - "cmdline arg"); - pgd.addClass("storecomplex", StoreComplex.class, "A map/reduce program that " + - "reads from the \"complex\" table and stores as-is into the " + - "\"complex_empty_initially\" table."); - pgd.addClass("storedemo", StoreDemo.class, "demo prog."); - pgd.driver(argv); + public static void main(String argv[]) { + int exitCode = -1; + ProgramDriver pgd = new ProgramDriver(); + try { + pgd.addClass("typedatacheck", TypeDataCheck.class, + "A map/reduce program that checks the type of each field and" + + " outputs the entire table (to test hcat)."); + pgd.addClass("sumnumbers", SumNumbers.class, + "A map/reduce program that performs a group by on the first column and a " + + "SUM operation on the other columns of the \"numbers\" table."); + pgd.addClass("storenumbers", StoreNumbers.class, "A map/reduce program that " + + "reads from the \"numbers\" table and adds 10 to each fields and writes " + + "to the \"numbers_partitioned\" table into the datestamp=20100101 " + + "partition OR the \"numbers_empty_initially\" table based on a " + + "cmdline arg"); + pgd.addClass("storecomplex", StoreComplex.class, "A map/reduce program that " + + "reads from the \"complex\" table and stores as-is into the " + + "\"complex_empty_initially\" table."); + pgd.addClass("storedemo", StoreDemo.class, "demo prog."); + pgd.driver(argv); - // Success - exitCode = 0; - } catch (Throwable e) { - e.printStackTrace(); - } - - System.exit(exitCode); + // Success + exitCode = 0; + } catch (Throwable e) { + e.printStackTrace(); } + + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheck.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheck.java index 3435ae6..f69be4f 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheck.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheck.java @@ -51,101 +51,101 @@ */ public class HCatTypeCheck extends EvalFunc { - static HashMap> typeMap = new HashMap>(); - - @Override - public Integer exec(Tuple input) throws IOException { - String schemaStr = (String) input.get(0); - Schema s = null; - try { - s = getSchemaFromString(schemaStr); - } catch (Exception e) { - throw new IOException(e); - } - for (int i = 0; i < s.size(); i++) { - check(s.getField(i).type, input.get(i + 1)); // input.get(i+1) since input.get(0) is the schema; - } - return 1; + static HashMap> typeMap = new HashMap>(); + + @Override + public Integer exec(Tuple input) throws IOException { + String schemaStr = (String) input.get(0); + Schema s = null; + try { + s = getSchemaFromString(schemaStr); + } catch (Exception e) { + throw new IOException(e); } - - static { - typeMap.put(DataType.INTEGER, Integer.class); - typeMap.put(DataType.LONG, Long.class); - typeMap.put(DataType.FLOAT, Float.class); - typeMap.put(DataType.DOUBLE, Double.class); - typeMap.put(DataType.CHARARRAY, String.class); - typeMap.put(DataType.TUPLE, Tuple.class); - typeMap.put(DataType.MAP, Map.class); - typeMap.put(DataType.BAG, DataBag.class); + for (int i = 0; i < s.size(); i++) { + check(s.getField(i).type, input.get(i + 1)); // input.get(i+1) since input.get(0) is the schema; } - - - private void die(String expectedType, Object o) throws IOException { - throw new IOException("Expected " + expectedType + ", got " + - o.getClass().getName()); + return 1; + } + + static { + typeMap.put(DataType.INTEGER, Integer.class); + typeMap.put(DataType.LONG, Long.class); + typeMap.put(DataType.FLOAT, Float.class); + typeMap.put(DataType.DOUBLE, Double.class); + typeMap.put(DataType.CHARARRAY, String.class); + typeMap.put(DataType.TUPLE, Tuple.class); + typeMap.put(DataType.MAP, Map.class); + typeMap.put(DataType.BAG, DataBag.class); + } + + + private void die(String expectedType, Object o) throws IOException { + throw new IOException("Expected " + expectedType + ", got " + + o.getClass().getName()); + } + + + private String check(Byte type, Object o) throws IOException { + if (o == null) { + return ""; } - - - private String check(Byte type, Object o) throws IOException { - if (o == null) { - return ""; + if (check(typeMap.get(type), o)) { + if (type.equals(DataType.MAP)) { + Map m = (Map) o; + check(m); + } else if (type.equals(DataType.BAG)) { + DataBag bg = (DataBag) o; + for (Tuple tuple : bg) { + Map m = (Map) tuple.get(0); + check(m); } - if (check(typeMap.get(type), o)) { - if (type.equals(DataType.MAP)) { - Map m = (Map) o; - check(m); - } else if (type.equals(DataType.BAG)) { - DataBag bg = (DataBag) o; - for (Tuple tuple : bg) { - Map m = (Map) tuple.get(0); - check(m); - } - } else if (type.equals(DataType.TUPLE)) { - Tuple t = (Tuple) o; - if (!check(Integer.class, t.get(0)) || - !check(String.class, t.get(1)) || - !check(Double.class, t.get(2))) { - die("t:tuple(num:int,str:string,dbl:double)", t); - } - } - } else { - die(typeMap.get(type).getName(), o); + } else if (type.equals(DataType.TUPLE)) { + Tuple t = (Tuple) o; + if (!check(Integer.class, t.get(0)) || + !check(String.class, t.get(1)) || + !check(Double.class, t.get(2))) { + die("t:tuple(num:int,str:string,dbl:double)", t); } - return o.toString(); + } + } else { + die(typeMap.get(type).getName(), o); } - - /** - * @param m - * @throws IOException - */ - private void check(Map m) throws IOException { - for (Entry e : m.entrySet()) { - // just access key and value to ensure they are correct - if (!check(String.class, e.getKey())) { - die("String", e.getKey()); - } - if (!check(String.class, e.getValue())) { - die("String", e.getValue()); - } - } - + return o.toString(); + } + + /** + * @param m + * @throws IOException + */ + private void check(Map m) throws IOException { + for (Entry e : m.entrySet()) { + // just access key and value to ensure they are correct + if (!check(String.class, e.getKey())) { + die("String", e.getKey()); + } + if (!check(String.class, e.getValue())) { + die("String", e.getValue()); + } } - private boolean check(Class expected, Object actual) { - if (actual == null) { - return true; - } - return expected.isAssignableFrom(actual.getClass()); - } + } - Schema getSchemaFromString(String schemaString) throws Exception { - /** ByteArrayInputStream stream = new ByteArrayInputStream(schemaString.getBytes()) ; - QueryParser queryParser = new QueryParser(stream) ; - Schema schema = queryParser.TupleSchema() ; - Schema.setSchemaDefaultType(schema, org.apache.pig.data.DataType.BYTEARRAY); - return schema; - */ - return Utils.getSchemaFromString(schemaString); + private boolean check(Class expected, Object actual) { + if (actual == null) { + return true; } + return expected.isAssignableFrom(actual.getClass()); + } + + Schema getSchemaFromString(String schemaString) throws Exception { + /** ByteArrayInputStream stream = new ByteArrayInputStream(schemaString.getBytes()) ; + QueryParser queryParser = new QueryParser(stream) ; + Schema schema = queryParser.TupleSchema() ; + Schema.setSchemaDefaultType(schema, org.apache.pig.data.DataType.BYTEARRAY); + return schema; + */ + return Utils.getSchemaFromString(schemaString); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheckHive.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheckHive.java index 19a74e9..abe3133 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheckHive.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheckHive.java @@ -62,81 +62,81 @@ */ public final class HCatTypeCheckHive extends GenericUDF { - ObjectInspector[] argOIs; + ObjectInspector[] argOIs; - @Override - public Object evaluate(DeferredObject[] args) throws HiveException { - List row = new ArrayList(); - String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList()); - String[] types = typesStr.split("\\+"); - for (int i = 0; i < types.length; i++) { - types[i] = types[i].toLowerCase(); - } - for (int i = 1; i < args.length; i++) { - ObjectInspector oi = argOIs[i]; - List categories = new ArrayList(); - Object o = getJavaObject(args[i].get(), oi, categories); - try { - if (o != null) { - Util.check(types[i - 1], o); - } - } catch (IOException e) { - throw new HiveException(e); - } - row.add(o == null ? "null" : o); - row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories); - } - return row.toString(); + @Override + public Object evaluate(DeferredObject[] args) throws HiveException { + List row = new ArrayList(); + String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList()); + String[] types = typesStr.split("\\+"); + for (int i = 0; i < types.length; i++) { + types[i] = types[i].toLowerCase(); } - - private Object getJavaObject(Object o, ObjectInspector oi, List categories) { - if (categories != null) { - categories.add(oi.getCategory()); + for (int i = 1; i < args.length; i++) { + ObjectInspector oi = argOIs[i]; + List categories = new ArrayList(); + Object o = getJavaObject(args[i].get(), oi, categories); + try { + if (o != null) { + Util.check(types[i - 1], o); } - if (oi.getCategory() == ObjectInspector.Category.LIST) { - List l = ((ListObjectInspector) oi).getList(o); - List result = new ArrayList(); - ObjectInspector elemOI = ((ListObjectInspector) oi).getListElementObjectInspector(); - for (Object lo : l) { - result.add(getJavaObject(lo, elemOI, categories)); - } - return result; - } else if (oi.getCategory() == ObjectInspector.Category.MAP) { - Map m = ((MapObjectInspector) oi).getMap(o); - Map result = new HashMap(); - ObjectInspector koi = ((MapObjectInspector) oi).getMapKeyObjectInspector(); - ObjectInspector voi = ((MapObjectInspector) oi).getMapValueObjectInspector(); - for (Entry e : m.entrySet()) { - result.put((String) getJavaObject(e.getKey(), koi, null), - (String) getJavaObject(e.getValue(), voi, null)); - } - return result; - - } else if (oi.getCategory() == ObjectInspector.Category.STRUCT) { - List s = ((StructObjectInspector) oi).getStructFieldsDataAsList(o); - List sf = ((StructObjectInspector) oi).getAllStructFieldRefs(); - List result = new ArrayList(); - for (int i = 0; i < s.size(); i++) { - result.add(getJavaObject(s.get(i), sf.get(i).getFieldObjectInspector(), categories)); - } - return result; - } else if (oi.getCategory() == ObjectInspector.Category.PRIMITIVE) { - return ((PrimitiveObjectInspector) oi).getPrimitiveJavaObject(o); - } - throw new RuntimeException("Unexpected error!"); + } catch (IOException e) { + throw new HiveException(e); + } + row.add(o == null ? "null" : o); + row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories); } + return row.toString(); + } - @Override - public String getDisplayString(String[] arg0) { - return null; + private Object getJavaObject(Object o, ObjectInspector oi, List categories) { + if (categories != null) { + categories.add(oi.getCategory()); } + if (oi.getCategory() == ObjectInspector.Category.LIST) { + List l = ((ListObjectInspector) oi).getList(o); + List result = new ArrayList(); + ObjectInspector elemOI = ((ListObjectInspector) oi).getListElementObjectInspector(); + for (Object lo : l) { + result.add(getJavaObject(lo, elemOI, categories)); + } + return result; + } else if (oi.getCategory() == ObjectInspector.Category.MAP) { + Map m = ((MapObjectInspector) oi).getMap(o); + Map result = new HashMap(); + ObjectInspector koi = ((MapObjectInspector) oi).getMapKeyObjectInspector(); + ObjectInspector voi = ((MapObjectInspector) oi).getMapValueObjectInspector(); + for (Entry e : m.entrySet()) { + result.put((String) getJavaObject(e.getKey(), koi, null), + (String) getJavaObject(e.getValue(), voi, null)); + } + return result; - @Override - public ObjectInspector initialize(ObjectInspector[] argOIs) - throws UDFArgumentException { - this.argOIs = argOIs; - return ObjectInspectorFactory.getReflectionObjectInspector(String.class, - ObjectInspectorOptions.JAVA); + } else if (oi.getCategory() == ObjectInspector.Category.STRUCT) { + List s = ((StructObjectInspector) oi).getStructFieldsDataAsList(o); + List sf = ((StructObjectInspector) oi).getAllStructFieldRefs(); + List result = new ArrayList(); + for (int i = 0; i < s.size(); i++) { + result.add(getJavaObject(s.get(i), sf.get(i).getFieldObjectInspector(), categories)); + } + return result; + } else if (oi.getCategory() == ObjectInspector.Category.PRIMITIVE) { + return ((PrimitiveObjectInspector) oi).getPrimitiveJavaObject(o); } + throw new RuntimeException("Unexpected error!"); + } + + @Override + public String getDisplayString(String[] arg0) { + return null; + } + + @Override + public ObjectInspector initialize(ObjectInspector[] argOIs) + throws UDFArgumentException { + this.argOIs = argOIs; + return ObjectInspectorFactory.getReflectionObjectInspector(String.class, + ObjectInspectorOptions.JAVA); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadJson.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadJson.java index 059dcdd..9ecfa43 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadJson.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadJson.java @@ -51,62 +51,62 @@ */ public class ReadJson extends Configured implements Tool { - public static class Map - extends Mapper { + public static class Map + extends Mapper { - String s; - Integer i; - Double d; + String s; + Integer i; + Double d; - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - s = value.get(0) == null ? null : (String) value.get(0); - i = value.get(1) == null ? null : (Integer) value.get(1); - d = value.get(2) == null ? null : (Double) value.get(2); + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + s = value.get(0) == null ? null : (String) value.get(0); + i = value.get(1) == null ? null : (Integer) value.get(1); + d = value.get(2) == null ? null : (Double) value.get(2); - HCatRecord record = new DefaultHCatRecord(3); - record.set(0, s); - record.set(1, i); - record.set(2, d); + HCatRecord record = new DefaultHCatRecord(3); + record.set(0, s); + record.set(1, i); + record.set(2, d); - context.write(null, record); + context.write(null, record); - } } + } - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadJson"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadJson"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(ReadJson.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(ReadJson.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadJson(), args); - System.exit(exitCode); - } + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadJson(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadRC.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadRC.java index 415bce9..b5c2fb7 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadRC.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadRC.java @@ -51,63 +51,63 @@ */ public class ReadRC extends Configured implements Tool { - public static class Map - extends Mapper { + public static class Map + extends Mapper { - String name; - int age; - double gpa; + String name; + int age; + double gpa; - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - age = (Integer) value.get(1); - gpa = (Double) value.get(2); - gpa = Math.floor(gpa) + 0.1; + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + age = (Integer) value.get(1); + gpa = (Double) value.get(2); + gpa = Math.floor(gpa) + 0.1; - HCatRecord record = new DefaultHCatRecord(3); - record.set(0, name); - record.set(1, age); - record.set(2, gpa); + HCatRecord record = new DefaultHCatRecord(3); + record.set(0, name); + record.set(1, age); + record.set(2, gpa); - context.write(null, record); + context.write(null, record); - } } + } - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadRC"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadRC"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(ReadRC.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(ReadRC.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadRC(), args); - System.exit(exitCode); - } + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadRC(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadText.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadText.java index 497ffdb..dae7fa2 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadText.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadText.java @@ -51,74 +51,74 @@ */ public class ReadText extends Configured implements Tool { - public static class Map - extends Mapper { + public static class Map + extends Mapper { - byte t; - short si; - int i; - long b; - float f; - double d; - String s; + byte t; + short si; + int i; + long b; + float f; + double d; + String s; - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - t = (Byte) value.get(0); - si = (Short) value.get(1); - i = (Integer) value.get(2); - b = (Long) value.get(3); - f = (Float) value.get(4); - d = (Double) value.get(5); - s = (String) value.get(6); + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + t = (Byte) value.get(0); + si = (Short) value.get(1); + i = (Integer) value.get(2); + b = (Long) value.get(3); + f = (Float) value.get(4); + d = (Double) value.get(5); + s = (String) value.get(6); - HCatRecord record = new DefaultHCatRecord(7); - record.set(0, t); - record.set(1, si); - record.set(2, i); - record.set(3, b); - record.set(4, f); - record.set(5, d); - record.set(6, s); + HCatRecord record = new DefaultHCatRecord(7); + record.set(0, t); + record.set(1, si); + record.set(2, i); + record.set(3, b); + record.set(4, f); + record.set(5, d); + record.set(6, s); - context.write(null, record); + context.write(null, record); - } } + } - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadText"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadText"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(ReadText.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(ReadText.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadText(), args); - System.exit(exitCode); - } + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadText(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadWrite.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadWrite.java index 0fefe33..e15d128 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadWrite.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadWrite.java @@ -51,62 +51,62 @@ */ public class ReadWrite extends Configured implements Tool { - public static class Map extends - Mapper { + public static class Map extends + Mapper { - String name; - int age; - double gpa; + String name; + int age; + double gpa; - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - age = (Integer) value.get(1); - gpa = (Double) value.get(2); - context.write(new Text(name), value); + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + age = (Integer) value.get(1); + gpa = (Double) value.get(2); + context.write(new Text(name), value); - } } + } - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadWrite"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadWrite"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(ReadWrite.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(DefaultHCatRecord.class); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(ReadWrite.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(DefaultHCatRecord.class); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadWrite(), args); - System.exit(exitCode); - } + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadWrite(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SimpleRead.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SimpleRead.java index ef2d6ed..550fb46 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SimpleRead.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SimpleRead.java @@ -51,58 +51,58 @@ */ public class SimpleRead extends Configured implements Tool { - private static final String TABLE_NAME = "studenttab10k"; - private static final String TAB = "\t"; + private static final String TABLE_NAME = "studenttab10k"; + private static final String TAB = "\t"; - public static class Map - extends Mapper { + public static class Map + extends Mapper { - String name; - int age; - double gpa; + String name; + int age; + double gpa; - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - age = (Integer) value.get(1); - gpa = (Double) value.get(2); - context.write(new Text(name), new IntWritable(age)); + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + age = (Integer) value.get(1); + gpa = (Double) value.get(2); + context.write(new Text(name), new IntWritable(age)); - } } + } - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "SimpleRead"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "SimpleRead"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(SimpleRead.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(IntWritable.class); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(SimpleRead.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new SimpleRead(), args); - System.exit(exitCode); - } + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new SimpleRead(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreComplex.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreComplex.java index 82749a8..579db17 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreComplex.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreComplex.java @@ -50,87 +50,87 @@ */ public class StoreComplex { - private static final String COMPLEX_TABLE_NAME = "complex"; - private static final String COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME = "complex_nopart_empty_initially"; + private static final String COMPLEX_TABLE_NAME = "complex"; + private static final String COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME = "complex_nopart_empty_initially"; - public static class ComplexMapper - extends Mapper { + public static class ComplexMapper + extends Mapper { - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - // just write out the value as-is - context.write(new IntWritable(0), value); + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + // just write out the value as-is + context.write(new IntWritable(0), value); - } } - - - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[1]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 1) { - usage(); - } - String serverUri = otherArgs[0]; - String tableName = COMPLEX_TABLE_NAME; - String dbName = "default"; - Map outputPartitionKvps = new HashMap(); - String outputTableName = null; - outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME; - // test with null or empty randomly - if (new Random().nextInt(2) == 0) { - System.err.println("INFO: output partition keys set to null for writing"); - outputPartitionKvps = null; - } - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "storecomplex"); - // initialize HCatInputFormat - - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - dbName, outputTableName, outputPartitionKvps)); - - - HCatSchema s = HCatInputFormat.getTableSchema(job); - HCatOutputFormat.setSchema(job, s); - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(StoreComplex.class); - job.setMapperClass(ComplexMapper.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[1]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } } - - - /** - * - */ - private static void usage() { - System.err.println("Usage: hadoop jar testudf.jar storecomplex <-libjars hive-hcat jar>\n" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - + if (otherArgs.length != 1) { + usage(); + } + String serverUri = otherArgs[0]; + String tableName = COMPLEX_TABLE_NAME; + String dbName = "default"; + Map outputPartitionKvps = new HashMap(); + String outputTableName = null; + outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME; + // test with null or empty randomly + if (new Random().nextInt(2) == 0) { + System.err.println("INFO: output partition keys set to null for writing"); + outputPartitionKvps = null; } + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "storecomplex"); + // initialize HCatInputFormat + + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + dbName, outputTableName, outputPartitionKvps)); + + + HCatSchema s = HCatInputFormat.getTableSchema(job); + HCatOutputFormat.setSchema(job, s); + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(StoreComplex.class); + job.setMapperClass(ComplexMapper.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + /** + * + */ + private static void usage() { + System.err.println("Usage: hadoop jar testudf.jar storecomplex <-libjars hive-hcat jar>\n" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreDemo.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreDemo.java index fdf642d..fb07561 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreDemo.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreDemo.java @@ -58,96 +58,96 @@ */ public class StoreDemo { - private static final String NUMBERS_PARTITIONED_TABLE_NAME = "demo_partitioned"; - private static final String NUMBERS_TABLE_NAME = "demo"; + private static final String NUMBERS_PARTITIONED_TABLE_NAME = "demo_partitioned"; + private static final String NUMBERS_TABLE_NAME = "demo"; - public static class SumMapper - extends Mapper { + public static class SumMapper + extends Mapper { - Integer intnum; + Integer intnum; - Double doublenum; + Double doublenum; - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - intnum = ((Integer) value.get(0)); - value.set(0, intnum + 20); - doublenum = ((Double) value.get(1)); - value.set(1, (Double) (doublenum + 20)); - context.write(new IntWritable(0), value); + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + intnum = ((Integer) value.get(0)); + value.set(0, intnum + 20); + doublenum = ((Double) value.get(1)); + value.set(1, (Double) (doublenum + 20)); + context.write(new IntWritable(0), value); - } } - - - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[1]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 1) { - usage(); - } - String serverUri = otherArgs[0]; - - String tableName = NUMBERS_TABLE_NAME; - String dbName = "default"; - Map outputPartitionKvps = new HashMap(); - String outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; - outputPartitionKvps.put("datestamp", "20100102"); - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "storedemo"); - // initialize HCatInputFormat - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - dbName, outputTableName, outputPartitionKvps)); - // test with and without specifying schema randomly - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" + s); - HCatOutputFormat.setSchema(job, s); - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(StoreDemo.class); - job.setMapperClass(SumMapper.class); - job.setOutputKeyClass(IntWritable.class); - job.setNumReduceTasks(0); - job.setOutputValueClass(DefaultHCatRecord.class); - System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[1]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } } - - - /** - * - */ - private static void usage() { - System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + - "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + - "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + - "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + - "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - + if (otherArgs.length != 1) { + usage(); } + String serverUri = otherArgs[0]; + + String tableName = NUMBERS_TABLE_NAME; + String dbName = "default"; + Map outputPartitionKvps = new HashMap(); + String outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; + outputPartitionKvps.put("datestamp", "20100102"); + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "storedemo"); + // initialize HCatInputFormat + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + dbName, outputTableName, outputPartitionKvps)); + // test with and without specifying schema randomly + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + s); + HCatOutputFormat.setSchema(job, s); + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(StoreDemo.class); + job.setMapperClass(SumMapper.class); + job.setOutputKeyClass(IntWritable.class); + job.setNumReduceTasks(0); + job.setOutputValueClass(DefaultHCatRecord.class); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + /** + * + */ + private static void usage() { + System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + + "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + + "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + + "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + + "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreNumbers.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreNumbers.java index 3be6e70..5d9ef05 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreNumbers.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreNumbers.java @@ -62,172 +62,172 @@ */ public class StoreNumbers { - private static final String NUMBERS_PARTITIONED_TABLE_NAME = "numbers_part_empty_initially"; - private static final String NUMBERS_TABLE_NAME = "numbers"; - private static final String NUMBERS_NON_PARTITIONED_TABLE_NAME = "numbers_nopart_empty_initially"; - private static final String NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME = "numbers_nopart_pig_empty_initially"; - private static final String IS_PIG_NON_PART_TABLE = "is.pig.non.part.table"; - - public static class SumMapper - extends Mapper { - - Integer intnum1000; - // though id is given as a Short by hcat, the map will emit it as an - // IntWritable so we can just sum in the reduce - Short id; - - // though intnum5 is handed as a Byte by hcat, the map() will emit it as - // an IntWritable so we can just sum in the reduce - Byte intnum5; - Integer intnum100; - Integer intnum; - Long longnum; - Float floatnum; - Double doublenum; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - boolean isnoPartPig = context.getConfiguration().getBoolean(IS_PIG_NON_PART_TABLE, false); - intnum1000 = ((Integer) value.get(0)); - id = ((Short) value.get(1)); - intnum5 = (((Byte) value.get(2))); - intnum100 = (((Integer) value.get(3))); - intnum = ((Integer) value.get(4)); - longnum = ((Long) value.get(5)); - floatnum = ((Float) value.get(6)); - doublenum = ((Double) value.get(7)); - HCatRecord output = new DefaultHCatRecord(8); - output.set(0, intnum1000 + 10); - if (isnoPartPig) { - output.set(1, ((int) (id + 10))); - } else { - output.set(1, ((short) (id + 10))); - } - if (isnoPartPig) { - output.set(2, (int) (intnum5 + 10)); - } else { - output.set(2, (byte) (intnum5 + 10)); - } - - output.set(3, intnum100 + 10); - output.set(4, intnum + 10); - output.set(5, (long) (longnum + 10)); - output.set(6, (float) (floatnum + 10)); - output.set(7, (double) (doublenum + 10)); - for (int i = 0; i < 8; i++) { - System.err.println("XXX: class:" + output.get(i).getClass()); - } - context.write(new IntWritable(0), output); + private static final String NUMBERS_PARTITIONED_TABLE_NAME = "numbers_part_empty_initially"; + private static final String NUMBERS_TABLE_NAME = "numbers"; + private static final String NUMBERS_NON_PARTITIONED_TABLE_NAME = "numbers_nopart_empty_initially"; + private static final String NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME = "numbers_nopart_pig_empty_initially"; + private static final String IS_PIG_NON_PART_TABLE = "is.pig.non.part.table"; + + public static class SumMapper + extends Mapper { + + Integer intnum1000; + // though id is given as a Short by hcat, the map will emit it as an + // IntWritable so we can just sum in the reduce + Short id; + + // though intnum5 is handed as a Byte by hcat, the map() will emit it as + // an IntWritable so we can just sum in the reduce + Byte intnum5; + Integer intnum100; + Integer intnum; + Long longnum; + Float floatnum; + Double doublenum; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + boolean isnoPartPig = context.getConfiguration().getBoolean(IS_PIG_NON_PART_TABLE, false); + intnum1000 = ((Integer) value.get(0)); + id = ((Short) value.get(1)); + intnum5 = (((Byte) value.get(2))); + intnum100 = (((Integer) value.get(3))); + intnum = ((Integer) value.get(4)); + longnum = ((Long) value.get(5)); + floatnum = ((Float) value.get(6)); + doublenum = ((Double) value.get(7)); + HCatRecord output = new DefaultHCatRecord(8); + output.set(0, intnum1000 + 10); + if (isnoPartPig) { + output.set(1, ((int) (id + 10))); + } else { + output.set(1, ((short) (id + 10))); + } + if (isnoPartPig) { + output.set(2, (int) (intnum5 + 10)); + } else { + output.set(2, (byte) (intnum5 + 10)); + } + + output.set(3, intnum100 + 10); + output.set(4, intnum + 10); + output.set(5, (long) (longnum + 10)); + output.set(6, (float) (floatnum + 10)); + output.set(7, (double) (doublenum + 10)); + for (int i = 0; i < 8; i++) { + System.err.println("XXX: class:" + output.get(i).getClass()); + } + context.write(new IntWritable(0), output); - } + } + } + + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[2]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } + } + if (otherArgs.length != 2) { + usage(); + } + String serverUri = otherArgs[0]; + if (otherArgs[1] == null || ( + !otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart")) + && !otherArgs[1].equalsIgnoreCase("nopart_pig")) { + usage(); + } + boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part")); + boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig")); + String tableName = NUMBERS_TABLE_NAME; + String dbName = "default"; + Map outputPartitionKvps = new HashMap(); + String outputTableName = null; + conf.set(IS_PIG_NON_PART_TABLE, "false"); + if (writeToPartitionedTable) { + outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; + outputPartitionKvps.put("datestamp", "20100101"); + } else { + if (writeToNonPartPigTable) { + conf.set(IS_PIG_NON_PART_TABLE, "true"); + outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME; + } else { + outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME; + } + // test with null or empty randomly + if (new Random().nextInt(2) == 0) { + outputPartitionKvps = null; + } } - - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[2]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 2) { - usage(); - } - String serverUri = otherArgs[0]; - if (otherArgs[1] == null || ( - !otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart")) - && !otherArgs[1].equalsIgnoreCase("nopart_pig")) { - usage(); - } - boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part")); - boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig")); - String tableName = NUMBERS_TABLE_NAME; - String dbName = "default"; - Map outputPartitionKvps = new HashMap(); - String outputTableName = null; - conf.set(IS_PIG_NON_PART_TABLE, "false"); - if (writeToPartitionedTable) { - outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; - outputPartitionKvps.put("datestamp", "20100101"); + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "storenumbers"); + + // initialize HCatInputFormat + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + dbName, outputTableName, outputPartitionKvps)); + // test with and without specifying schema randomly + HCatSchema s = HCatInputFormat.getTableSchema(job); + if (writeToNonPartPigTable) { + List newHfsList = new ArrayList(); + // change smallint and tinyint to int + for (HCatFieldSchema hfs : s.getFields()) { + if (hfs.getTypeString().equals("smallint")) { + newHfsList.add(new HCatFieldSchema(hfs.getName(), + HCatFieldSchema.Type.INT, hfs.getComment())); + } else if (hfs.getTypeString().equals("tinyint")) { + newHfsList.add(new HCatFieldSchema(hfs.getName(), + HCatFieldSchema.Type.INT, hfs.getComment())); } else { - if (writeToNonPartPigTable) { - conf.set(IS_PIG_NON_PART_TABLE, "true"); - outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME; - } else { - outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME; - } - // test with null or empty randomly - if (new Random().nextInt(2) == 0) { - outputPartitionKvps = null; - } + newHfsList.add(hfs); } - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "storenumbers"); - - // initialize HCatInputFormat - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - dbName, outputTableName, outputPartitionKvps)); - // test with and without specifying schema randomly - HCatSchema s = HCatInputFormat.getTableSchema(job); - if (writeToNonPartPigTable) { - List newHfsList = new ArrayList(); - // change smallint and tinyint to int - for (HCatFieldSchema hfs : s.getFields()) { - if (hfs.getTypeString().equals("smallint")) { - newHfsList.add(new HCatFieldSchema(hfs.getName(), - HCatFieldSchema.Type.INT, hfs.getComment())); - } else if (hfs.getTypeString().equals("tinyint")) { - newHfsList.add(new HCatFieldSchema(hfs.getName(), - HCatFieldSchema.Type.INT, hfs.getComment())); - } else { - newHfsList.add(hfs); - } - } - s = new HCatSchema(newHfsList); - } - HCatOutputFormat.setSchema(job, s); - - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(StoreNumbers.class); - job.setMapperClass(SumMapper.class); - job.setOutputKeyClass(IntWritable.class); - job.setNumReduceTasks(0); - job.setOutputValueClass(DefaultHCatRecord.class); - System.exit(job.waitForCompletion(true) ? 0 : 1); - } - - - /** - * - */ - private static void usage() { - System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + - "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + - "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + - "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + - "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - + } + s = new HCatSchema(newHfsList); } + HCatOutputFormat.setSchema(job, s); + + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(StoreNumbers.class); + job.setMapperClass(SumMapper.class); + job.setOutputKeyClass(IntWritable.class); + job.setNumReduceTasks(0); + job.setOutputValueClass(DefaultHCatRecord.class); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + /** + * + */ + private static void usage() { + System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + + "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + + "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + + "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + + "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SumNumbers.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SumNumbers.java index cfb138d..08725c4 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SumNumbers.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SumNumbers.java @@ -55,204 +55,204 @@ */ public class SumNumbers { - private static final String NUMBERS_TABLE_NAME = "numbers"; - private static final String TAB = "\t"; + private static final String NUMBERS_TABLE_NAME = "numbers"; + private static final String TAB = "\t"; - public static class SumMapper - extends Mapper { + public static class SumMapper + extends Mapper { - IntWritable intnum1000; - // though id is given as a Short by hcat, the map will emit it as an - // IntWritable so we can just sum in the reduce - IntWritable id; + IntWritable intnum1000; + // though id is given as a Short by hcat, the map will emit it as an + // IntWritable so we can just sum in the reduce + IntWritable id; - // though intnum5 is handed as a Byte by hcat, the map() will emit it as - // an IntWritable so we can just sum in the reduce - IntWritable intnum5; - IntWritable intnum100; - IntWritable intnum; - LongWritable longnum; - FloatWritable floatnum; - DoubleWritable doublenum; + // though intnum5 is handed as a Byte by hcat, the map() will emit it as + // an IntWritable so we can just sum in the reduce + IntWritable intnum5; + IntWritable intnum100; + IntWritable intnum; + LongWritable longnum; + FloatWritable floatnum; + DoubleWritable doublenum; - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - intnum1000 = new IntWritable((Integer) value.get(0)); - id = new IntWritable((Short) value.get(1)); - intnum5 = new IntWritable(((Byte) value.get(2))); - intnum100 = new IntWritable(((Integer) value.get(3))); - intnum = new IntWritable((Integer) value.get(4)); - longnum = new LongWritable((Long) value.get(5)); - floatnum = new FloatWritable((Float) value.get(6)); - doublenum = new DoubleWritable((Double) value.get(7)); - SumNumbers.ArrayWritable outputValue = new SumNumbers.ArrayWritable(id, - intnum5, intnum100, intnum, longnum, floatnum, doublenum); - context.write(intnum1000, outputValue); + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + intnum1000 = new IntWritable((Integer) value.get(0)); + id = new IntWritable((Short) value.get(1)); + intnum5 = new IntWritable(((Byte) value.get(2))); + intnum100 = new IntWritable(((Integer) value.get(3))); + intnum = new IntWritable((Integer) value.get(4)); + longnum = new LongWritable((Long) value.get(5)); + floatnum = new FloatWritable((Float) value.get(6)); + doublenum = new DoubleWritable((Double) value.get(7)); + SumNumbers.ArrayWritable outputValue = new SumNumbers.ArrayWritable(id, + intnum5, intnum100, intnum, longnum, floatnum, doublenum); + context.write(intnum1000, outputValue); - } } + } - public static class SumReducer extends Reducer { + public static class SumReducer extends Reducer { - LongWritable dummyLong = null; + LongWritable dummyLong = null; - @Override - protected void reduce(IntWritable key, java.lang.Iterable - values, org.apache.hadoop.mapreduce.Reducer.Context context) - throws IOException, InterruptedException { - String output = key.toString() + TAB; - Long sumid = 0l; - Long sumintnum5 = 0l; - Long sumintnum100 = 0l; - Long sumintnum = 0l; - Long sumlongnum = 0l; - Float sumfloatnum = 0.0f; - Double sumdoublenum = 0.0; - for (ArrayWritable value : values) { - sumid += value.id.get(); - sumintnum5 += value.intnum5.get(); - sumintnum100 += value.intnum100.get(); - sumintnum += value.intnum.get(); - sumlongnum += value.longnum.get(); - sumfloatnum += value.floatnum.get(); - sumdoublenum += value.doublenum.get(); - } - output += sumid + TAB; - output += sumintnum5 + TAB; - output += sumintnum100 + TAB; - output += sumintnum + TAB; - output += sumlongnum + TAB; - output += sumfloatnum + TAB; - output += sumdoublenum + TAB; - context.write(dummyLong, new Text(output)); - } + @Override + protected void reduce(IntWritable key, java.lang.Iterable + values, org.apache.hadoop.mapreduce.Reducer.Context context) + throws IOException, InterruptedException { + String output = key.toString() + TAB; + Long sumid = 0l; + Long sumintnum5 = 0l; + Long sumintnum100 = 0l; + Long sumintnum = 0l; + Long sumlongnum = 0l; + Float sumfloatnum = 0.0f; + Double sumdoublenum = 0.0; + for (ArrayWritable value : values) { + sumid += value.id.get(); + sumintnum5 += value.intnum5.get(); + sumintnum100 += value.intnum100.get(); + sumintnum += value.intnum.get(); + sumlongnum += value.longnum.get(); + sumfloatnum += value.floatnum.get(); + sumdoublenum += value.doublenum.get(); + } + output += sumid + TAB; + output += sumintnum5 + TAB; + output += sumintnum100 + TAB; + output += sumintnum + TAB; + output += sumlongnum + TAB; + output += sumfloatnum + TAB; + output += sumdoublenum + TAB; + context.write(dummyLong, new Text(output)); } + } - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[4]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 4) { - System.err.println("Usage: hadoop jar sumnumbers <-libjars hive-hcat jar>\n" + - "The argument controls the output delimiter.\n" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - } - String serverUri = otherArgs[0]; - String tableName = NUMBERS_TABLE_NAME; - String outputDir = otherArgs[1]; - String dbName = "default"; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "sumnumbers"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(SumNumbers.class); - job.setMapperClass(SumMapper.class); - job.setReducerClass(SumReducer.class); - job.setMapOutputKeyClass(IntWritable.class); - job.setMapOutputValueClass(ArrayWritable.class); - job.setOutputKeyClass(LongWritable.class); - job.setOutputValueClass(Text.class); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - System.exit(job.waitForCompletion(true) ? 0 : 1); + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[4]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } } + if (otherArgs.length != 4) { + System.err.println("Usage: hadoop jar sumnumbers <-libjars hive-hcat jar>\n" + + "The argument controls the output delimiter.\n" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + } + String serverUri = otherArgs[0]; + String tableName = NUMBERS_TABLE_NAME; + String outputDir = otherArgs[1]; + String dbName = "default"; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "sumnumbers"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat - public static class ArrayWritable implements Writable { + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(SumNumbers.class); + job.setMapperClass(SumMapper.class); + job.setReducerClass(SumReducer.class); + job.setMapOutputKeyClass(IntWritable.class); + job.setMapOutputValueClass(ArrayWritable.class); + job.setOutputKeyClass(LongWritable.class); + job.setOutputValueClass(Text.class); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } - // though id is given as a Short by hcat, the map will emit it as an - // IntWritable so we can just sum in the reduce - IntWritable id; + public static class ArrayWritable implements Writable { - // though intnum5 is handed as a Byte by hcat, the map() will emit it as - // an IntWritable so we can just sum in the reduce - IntWritable intnum5; + // though id is given as a Short by hcat, the map will emit it as an + // IntWritable so we can just sum in the reduce + IntWritable id; - IntWritable intnum100; - IntWritable intnum; - LongWritable longnum; - FloatWritable floatnum; - DoubleWritable doublenum; + // though intnum5 is handed as a Byte by hcat, the map() will emit it as + // an IntWritable so we can just sum in the reduce + IntWritable intnum5; - /** - * - */ - public ArrayWritable() { - id = new IntWritable(); - intnum5 = new IntWritable(); - intnum100 = new IntWritable(); - intnum = new IntWritable(); - longnum = new LongWritable(); - floatnum = new FloatWritable(); - doublenum = new DoubleWritable(); - } + IntWritable intnum100; + IntWritable intnum; + LongWritable longnum; + FloatWritable floatnum; + DoubleWritable doublenum; + /** + * + */ + public ArrayWritable() { + id = new IntWritable(); + intnum5 = new IntWritable(); + intnum100 = new IntWritable(); + intnum = new IntWritable(); + longnum = new LongWritable(); + floatnum = new FloatWritable(); + doublenum = new DoubleWritable(); + } - /** - * @param id - * @param intnum5 - * @param intnum100 - * @param intnum - * @param longnum - * @param floatnum - * @param doublenum - */ - public ArrayWritable(IntWritable id, IntWritable intnum5, - IntWritable intnum100, IntWritable intnum, LongWritable longnum, - FloatWritable floatnum, DoubleWritable doublenum) { - this.id = id; - this.intnum5 = intnum5; - this.intnum100 = intnum100; - this.intnum = intnum; - this.longnum = longnum; - this.floatnum = floatnum; - this.doublenum = doublenum; - } + /** + * @param id + * @param intnum5 + * @param intnum100 + * @param intnum + * @param longnum + * @param floatnum + * @param doublenum + */ + public ArrayWritable(IntWritable id, IntWritable intnum5, + IntWritable intnum100, IntWritable intnum, LongWritable longnum, + FloatWritable floatnum, DoubleWritable doublenum) { + this.id = id; + this.intnum5 = intnum5; + this.intnum100 = intnum100; + this.intnum = intnum; + this.longnum = longnum; + this.floatnum = floatnum; + this.doublenum = doublenum; + } - @Override - public void readFields(DataInput in) throws IOException { - id.readFields(in); - intnum5.readFields(in); - intnum100.readFields(in); - intnum.readFields(in); - longnum.readFields(in); - floatnum.readFields(in); - doublenum.readFields(in); - } - @Override - public void write(DataOutput out) throws IOException { - id.write(out); - intnum5.write(out); - intnum100.write(out); - intnum.write(out); - longnum.write(out); - floatnum.write(out); - doublenum.write(out); + @Override + public void readFields(DataInput in) throws IOException { + id.readFields(in); + intnum5.readFields(in); + intnum100.readFields(in); + intnum.readFields(in); + longnum.readFields(in); + floatnum.readFields(in); + doublenum.readFields(in); + } - } + @Override + public void write(DataOutput out) throws IOException { + id.write(out); + intnum5.write(out); + intnum100.write(out); + intnum.write(out); + longnum.write(out); + floatnum.write(out); + doublenum.write(out); } + + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/TypeDataCheck.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/TypeDataCheck.java index 27644f7..503b2ff 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/TypeDataCheck.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/TypeDataCheck.java @@ -58,126 +58,126 @@ */ public class TypeDataCheck implements Tool { - static String SCHEMA_KEY = "schema"; - static String DELIM = "delim"; - private static Configuration conf = new Configuration(); - - public static class TypeDataCheckMapper - extends Mapper { - - Long dummykey = null; - String[] types; - String delim = "\u0001"; - - @Override - protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - String typesStr = context.getConfiguration().get(SCHEMA_KEY); - delim = context.getConfiguration().get(DELIM); - if (delim.equals("tab")) { - delim = "\t"; - } else if (delim.equals("ctrla")) { - delim = "\u0001"; - } - types = typesStr.split("\\+"); - for (int i = 0; i < types.length; i++) { - types[i] = types[i].toLowerCase(); - } + static String SCHEMA_KEY = "schema"; + static String DELIM = "delim"; + private static Configuration conf = new Configuration(); + public static class TypeDataCheckMapper + extends Mapper { - } + Long dummykey = null; + String[] types; + String delim = "\u0001"; - String check(HCatRecord r) throws IOException { - String s = ""; - for (int i = 0; i < r.size(); i++) { - s += Util.check(types[i], r.get(i)); - if (i != r.size() - 1) { - s += delim; - } - } - return s; - } + @Override + protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + String typesStr = context.getConfiguration().get(SCHEMA_KEY); + delim = context.getConfiguration().get(DELIM); + if (delim.equals("tab")) { + delim = "\t"; + } else if (delim.equals("ctrla")) { + delim = "\u0001"; + } + types = typesStr.split("\\+"); + for (int i = 0; i < types.length; i++) { + types[i] = types[i].toLowerCase(); + } - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - context.write(dummykey, new Text(check(value))); - } - } - public static void main(String[] args) throws Exception { - TypeDataCheck self = new TypeDataCheck(); - System.exit(ToolRunner.run(conf, self, args)); } - public int run(String[] args) { - try { - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[5]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 5) { - System.err.println("Other args:" + Arrays.asList(otherArgs)); - System.err.println("Usage: hadoop jar testudf.jar typedatacheck " + - " " + - " <-libjars hive-hcat jar>\n" + - "The argument controls the output delimiter.\n" + - "The hcat jar location should be specified as file://\n"); - System.err.println(" The argument controls the output delimiter."); - System.exit(2); - } - String serverUri = otherArgs[0]; - String tableName = otherArgs[1]; - String schemaStr = otherArgs[2]; - String outputDir = otherArgs[3]; - String outputdelim = otherArgs[4]; - if (!outputdelim.equals("tab") && !outputdelim.equals("ctrla")) { - System.err.println("ERROR: Specify 'tab' or 'ctrla' for output delimiter"); - } - String dbName = "default"; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) { - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - } - Job job = new Job(conf, "typedatacheck"); - // initialize HCatInputFormat - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - job.getConfiguration().set(SCHEMA_KEY, schemaStr); - job.getConfiguration().set(DELIM, outputdelim); - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(TypeDataCheck.class); - job.setMapperClass(TypeDataCheckMapper.class); - job.setNumReduceTasks(0); - job.setOutputKeyClass(Long.class); - job.setOutputValueClass(Text.class); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - System.exit(job.waitForCompletion(true) ? 0 : 1); - return 0; - } catch (Exception e) { - throw new RuntimeException(e); + String check(HCatRecord r) throws IOException { + String s = ""; + for (int i = 0; i < r.size(); i++) { + s += Util.check(types[i], r.get(i)); + if (i != r.size() - 1) { + s += delim; } + } + return s; } @Override - public Configuration getConf() { - return conf; + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + context.write(dummykey, new Text(check(value))); } - - @Override - public void setConf(Configuration conf) { - TypeDataCheck.conf = conf; + } + + public static void main(String[] args) throws Exception { + TypeDataCheck self = new TypeDataCheck(); + System.exit(ToolRunner.run(conf, self, args)); + } + + public int run(String[] args) { + try { + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[5]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } + } + if (otherArgs.length != 5) { + System.err.println("Other args:" + Arrays.asList(otherArgs)); + System.err.println("Usage: hadoop jar testudf.jar typedatacheck " + + " " + + " <-libjars hive-hcat jar>\n" + + "The argument controls the output delimiter.\n" + + "The hcat jar location should be specified as file://\n"); + System.err.println(" The argument controls the output delimiter."); + System.exit(2); + } + String serverUri = otherArgs[0]; + String tableName = otherArgs[1]; + String schemaStr = otherArgs[2]; + String outputDir = otherArgs[3]; + String outputdelim = otherArgs[4]; + if (!outputdelim.equals("tab") && !outputdelim.equals("ctrla")) { + System.err.println("ERROR: Specify 'tab' or 'ctrla' for output delimiter"); + } + String dbName = "default"; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) { + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + } + Job job = new Job(conf, "typedatacheck"); + // initialize HCatInputFormat + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + job.getConfiguration().set(SCHEMA_KEY, schemaStr); + job.getConfiguration().set(DELIM, outputdelim); + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(TypeDataCheck.class); + job.setMapperClass(TypeDataCheckMapper.class); + job.setNumReduceTasks(0); + job.setOutputKeyClass(Long.class); + job.setOutputValueClass(Text.class); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + System.exit(job.waitForCompletion(true) ? 0 : 1); + return 0; + } catch (Exception e) { + throw new RuntimeException(e); } + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + TypeDataCheck.conf = conf; + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/Util.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/Util.java index 97104b8..5c4630e 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/Util.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/Util.java @@ -27,80 +27,80 @@ public class Util { - static Map> typeMap = new HashMap>(); + static Map> typeMap = new HashMap>(); - static { - typeMap.put("tinyint", Byte.class); - typeMap.put("smallint", Short.class); - typeMap.put("int", Integer.class); - typeMap.put("bigint", Long.class); - typeMap.put("float", Float.class); - typeMap.put("double", Double.class); - typeMap.put("string", String.class); - typeMap.put("boolean", Boolean.class); - typeMap.put("struct", List.class); - typeMap.put("map", Map.class); - typeMap.put("array>", List.class); - } + static { + typeMap.put("tinyint", Byte.class); + typeMap.put("smallint", Short.class); + typeMap.put("int", Integer.class); + typeMap.put("bigint", Long.class); + typeMap.put("float", Float.class); + typeMap.put("double", Double.class); + typeMap.put("string", String.class); + typeMap.put("boolean", Boolean.class); + typeMap.put("struct", List.class); + typeMap.put("map", Map.class); + typeMap.put("array>", List.class); + } - public static void die(String expectedType, Object o) throws IOException { - throw new IOException("Expected " + expectedType + ", got " + - o.getClass().getName()); - } + public static void die(String expectedType, Object o) throws IOException { + throw new IOException("Expected " + expectedType + ", got " + + o.getClass().getName()); + } - public static String check(String type, Object o) throws IOException { - if (o == null) { - return "null"; - } - if (check(typeMap.get(type), o)) { - if (type.equals("map")) { - Map m = (Map) o; - check(m); - } else if (type.equals("array>")) { - List> listOfMaps = (List>) o; - for (Map m : listOfMaps) { - check(m); - } - } else if (type.equals("struct")) { - List l = (List) o; - if (!check(Integer.class, l.get(0)) || - !check(String.class, l.get(1)) || - !check(Double.class, l.get(2))) { - die("struct", l); - } - } - } else { - die(typeMap.get(type).getName(), o); - } - return o.toString(); + public static String check(String type, Object o) throws IOException { + if (o == null) { + return "null"; } - - /** - * @param m - * @throws IOException - */ - public static void check(Map m) throws IOException { - if (m == null) { - return; + if (check(typeMap.get(type), o)) { + if (type.equals("map")) { + Map m = (Map) o; + check(m); + } else if (type.equals("array>")) { + List> listOfMaps = (List>) o; + for (Map m : listOfMaps) { + check(m); } - for (Entry e : m.entrySet()) { - // just access key and value to ensure they are correct - if (!check(String.class, e.getKey())) { - die("String", e.getKey()); - } - if (!check(String.class, e.getValue())) { - die("String", e.getValue()); - } + } else if (type.equals("struct")) { + List l = (List) o; + if (!check(Integer.class, l.get(0)) || + !check(String.class, l.get(1)) || + !check(Double.class, l.get(2))) { + die("struct", l); } + } + } else { + die(typeMap.get(type).getName(), o); + } + return o.toString(); + } + /** + * @param m + * @throws IOException + */ + public static void check(Map m) throws IOException { + if (m == null) { + return; + } + for (Entry e : m.entrySet()) { + // just access key and value to ensure they are correct + if (!check(String.class, e.getKey())) { + die("String", e.getKey()); + } + if (!check(String.class, e.getValue())) { + die("String", e.getValue()); + } } - public static boolean check(Class expected, Object actual) { - if (actual == null) { - return true; - } - return expected.isAssignableFrom(actual.getClass()); + } + + public static boolean check(Class expected, Object actual) { + if (actual == null) { + return true; } + return expected.isAssignableFrom(actual.getClass()); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteJson.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteJson.java index b60a511..a54cc0a 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteJson.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteJson.java @@ -50,69 +50,69 @@ */ public class WriteJson extends Configured implements Tool { - public static class Map extends - Mapper { + public static class Map extends + Mapper { - String s; - Integer i; - Double d; + String s; + Integer i; + Double d; - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - s = value.get(0) == null ? null : (String) value.get(0); - i = value.get(1) == null ? null : (Integer) value.get(1); - d = value.get(2) == null ? null : (Double) value.get(2); + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + s = value.get(0) == null ? null : (String) value.get(0); + i = value.get(1) == null ? null : (Integer) value.get(1); + d = value.get(2) == null ? null : (Double) value.get(2); - HCatRecord record = new DefaultHCatRecord(5); - record.set(0, s); - record.set(1, i); - record.set(2, d); + HCatRecord record = new DefaultHCatRecord(5); + record.set(0, s); + record.set(1, i); + record.set(2, d); - context.write(null, record); + context.write(null, record); - } } + } - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteJson"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteJson"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteJson.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteJson.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteJson(), args); - System.exit(exitCode); - } + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteJson(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteRC.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteRC.java index 1c72872..4e90b31 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteRC.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteRC.java @@ -50,71 +50,71 @@ */ public class WriteRC extends Configured implements Tool { - public static class Map extends - Mapper { + public static class Map extends + Mapper { - String name; - Integer age; - Double gpa; + String name; + Integer age; + Double gpa; - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = value.get(0) == null ? null : (String) value.get(0); - age = value.get(1) == null ? null : (Integer) value.get(1); - gpa = value.get(2) == null ? null : (Double) value.get(2); + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = value.get(0) == null ? null : (String) value.get(0); + age = value.get(1) == null ? null : (Integer) value.get(1); + gpa = value.get(2) == null ? null : (Double) value.get(2); - if (gpa != null) gpa = Math.floor(gpa) + 0.1; + if (gpa != null) gpa = Math.floor(gpa) + 0.1; - HCatRecord record = new DefaultHCatRecord(5); - record.set(0, name); - record.set(1, age); - record.set(2, gpa); + HCatRecord record = new DefaultHCatRecord(5); + record.set(0, name); + record.set(1, age); + record.set(2, gpa); - context.write(null, record); + context.write(null, record); - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteRC"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteRC.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteRC(), args); - System.exit(exitCode); } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteRC"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteRC.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteRC(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteText.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteText.java index 43767a2..e1102bc 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteText.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteText.java @@ -50,81 +50,81 @@ */ public class WriteText extends Configured implements Tool { - public static class Map extends - Mapper { + public static class Map extends + Mapper { - byte t; - short si; - int i; - long b; - float f; - double d; - String s; + byte t; + short si; + int i; + long b; + float f; + double d; + String s; - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - t = (Byte) value.get(0); - si = (Short) value.get(1); - i = (Integer) value.get(2); - b = (Long) value.get(3); - f = (Float) value.get(4); - d = (Double) value.get(5); - s = (String) value.get(6); + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + t = (Byte) value.get(0); + si = (Short) value.get(1); + i = (Integer) value.get(2); + b = (Long) value.get(3); + f = (Float) value.get(4); + d = (Double) value.get(5); + s = (String) value.get(6); - HCatRecord record = new DefaultHCatRecord(7); - record.set(0, t); - record.set(1, si); - record.set(2, i); - record.set(3, b); - record.set(4, f); - record.set(5, d); - record.set(6, s); + HCatRecord record = new DefaultHCatRecord(7); + record.set(0, t); + record.set(1, si); + record.set(2, i); + record.set(3, b); + record.set(4, f); + record.set(5, d); + record.set(6, s); - context.write(null, record); + context.write(null, record); - } } + } - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteText"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteText"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteText.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteText.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteText(), args); - System.exit(exitCode); - } + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteText(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteTextPartitioned.java b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteTextPartitioned.java index b7e1549..8fd6607 100644 --- a/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteTextPartitioned.java +++ b/hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteTextPartitioned.java @@ -54,80 +54,80 @@ */ public class WriteTextPartitioned extends Configured implements Tool { - static String filter = null; + static String filter = null; - public static class Map extends - Mapper { + public static class Map extends + Mapper { - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - String name = (String) value.get(0); - int age = (Integer) value.get(1); - String ds = (String) value.get(3); + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + String name = (String) value.get(0); + int age = (Integer) value.get(1); + String ds = (String) value.get(3); - HCatRecord record = (filter == null ? new DefaultHCatRecord(3) : new DefaultHCatRecord(2)); - record.set(0, name); - record.set(1, age); - if (filter == null) record.set(2, ds); + HCatRecord record = (filter == null ? new DefaultHCatRecord(3) : new DefaultHCatRecord(2)); + record.set(0, name); + record.set(1, age); + if (filter == null) record.set(2, ds); - context.write(null, record); + context.write(null, record); - } } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - if (args.length > 3) filter = args[3]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteTextPartitioned"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, filter)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteTextPartitioned.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - - java.util.Map partitionVals = null; - if (filter != null) { - String[] s = filter.split("="); - String val = s[1].replace('"', ' ').trim(); - partitionVals = new HashMap(1); - partitionVals.put(s[0], val); - } - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, partitionVals)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - // Build the schema for this table, which is slightly different than the - // schema for the input table - List fss = new ArrayList(3); - fss.add(s.get(0)); - fss.add(s.get(1)); - fss.add(s.get(3)); - HCatOutputFormat.setSchema(job, new HCatSchema(fss)); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteTextPartitioned(), args); - System.exit(exitCode); + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + if (args.length > 3) filter = args[3]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteTextPartitioned"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, filter)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteTextPartitioned.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + + java.util.Map partitionVals = null; + if (filter != null) { + String[] s = filter.split("="); + String val = s[1].replace('"', ' ').trim(); + partitionVals = new HashMap(1); + partitionVals.put(s[0], val); } + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, partitionVals)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + // Build the schema for this table, which is slightly different than the + // schema for the input table + List fss = new ArrayList(3); + fss.add(s.get(0)); + fss.add(s.get(1)); + fss.add(s.get(3)); + HCatOutputFormat.setSchema(job, new HCatSchema(fss)); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteTextPartitioned(), args); + System.exit(exitCode); + } } diff --git a/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java b/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java index a5d8213..ae35f12 100644 --- a/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java +++ b/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java @@ -35,382 +35,382 @@ import java.util.BitSet; public class StoreFamilyRevision implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevision"); + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevision"); - private static final org.apache.thrift.protocol.TField REVISION_FIELD_DESC = new org.apache.thrift.protocol.TField("revision", org.apache.thrift.protocol.TType.I64, (short) 1); - private static final org.apache.thrift.protocol.TField TIMESTAMP_FIELD_DESC = new org.apache.thrift.protocol.TField("timestamp", org.apache.thrift.protocol.TType.I64, (short) 2); + private static final org.apache.thrift.protocol.TField REVISION_FIELD_DESC = new org.apache.thrift.protocol.TField("revision", org.apache.thrift.protocol.TType.I64, (short) 1); + private static final org.apache.thrift.protocol.TField TIMESTAMP_FIELD_DESC = new org.apache.thrift.protocol.TField("timestamp", org.apache.thrift.protocol.TType.I64, (short) 2); - public long revision; // required - public long timestamp; // required + public long revision; // required + public long timestamp; // required - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REVISION((short) 1, "revision"), - TIMESTAMP((short) 2, "timestamp"); + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + REVISION((short) 1, "revision"), + TIMESTAMP((short) 2, "timestamp"); - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch (fieldId) { - case 1: // REVISION - return REVISION; - case 2: // TIMESTAMP - return TIMESTAMP; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __REVISION_ISSET_ID = 0; - private static final int __TIMESTAMP_ISSET_ID = 1; - private BitSet __isset_bit_vector = new BitSet(2); - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + private static final Map byName = new HashMap(); static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REVISION, new org.apache.thrift.meta_data.FieldMetaData("revision", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.TIMESTAMP, new org.apache.thrift.meta_data.FieldMetaData("timestamp", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevision.class, metaDataMap); + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } } - public StoreFamilyRevision() { + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch (fieldId) { + case 1: // REVISION + return REVISION; + case 2: // TIMESTAMP + return TIMESTAMP; + default: + return null; + } } - public StoreFamilyRevision( - long revision, - long timestamp) { - this(); - this.revision = revision; - setRevisionIsSet(true); - this.timestamp = timestamp; - setTimestampIsSet(true); + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; } /** - * Performs a deep copy on other. + * Find the _Fields constant that matches name, or null if its not found. */ - public StoreFamilyRevision(StoreFamilyRevision other) { - __isset_bit_vector.clear(); - __isset_bit_vector.or(other.__isset_bit_vector); - this.revision = other.revision; - this.timestamp = other.timestamp; + public static _Fields findByName(String name) { + return byName.get(name); } - public StoreFamilyRevision deepCopy() { - return new StoreFamilyRevision(this); - } + private final short _thriftId; + private final String _fieldName; - @Override - public void clear() { - setRevisionIsSet(false); - this.revision = 0; - setTimestampIsSet(false); - this.timestamp = 0; + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; } - public long getRevision() { - return this.revision; + public short getThriftFieldId() { + return _thriftId; } - public StoreFamilyRevision setRevision(long revision) { - this.revision = revision; - setRevisionIsSet(true); - return this; + public String getFieldName() { + return _fieldName; } + } + + // isset id assignments + private static final int __REVISION_ISSET_ID = 0; + private static final int __TIMESTAMP_ISSET_ID = 1; + private BitSet __isset_bit_vector = new BitSet(2); + + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.REVISION, new org.apache.thrift.meta_data.FieldMetaData("revision", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.TIMESTAMP, new org.apache.thrift.meta_data.FieldMetaData("timestamp", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevision.class, metaDataMap); + } + + public StoreFamilyRevision() { + } + + public StoreFamilyRevision( + long revision, + long timestamp) { + this(); + this.revision = revision; + setRevisionIsSet(true); + this.timestamp = timestamp; + setTimestampIsSet(true); + } + + /** + * Performs a deep copy on other. + */ + public StoreFamilyRevision(StoreFamilyRevision other) { + __isset_bit_vector.clear(); + __isset_bit_vector.or(other.__isset_bit_vector); + this.revision = other.revision; + this.timestamp = other.timestamp; + } + + public StoreFamilyRevision deepCopy() { + return new StoreFamilyRevision(this); + } + + @Override + public void clear() { + setRevisionIsSet(false); + this.revision = 0; + setTimestampIsSet(false); + this.timestamp = 0; + } + + public long getRevision() { + return this.revision; + } + + public StoreFamilyRevision setRevision(long revision) { + this.revision = revision; + setRevisionIsSet(true); + return this; + } + + public void unsetRevision() { + __isset_bit_vector.clear(__REVISION_ISSET_ID); + } + + /** Returns true if field revision is set (has been assigned a value) and false otherwise */ + public boolean isSetRevision() { + return __isset_bit_vector.get(__REVISION_ISSET_ID); + } + + public void setRevisionIsSet(boolean value) { + __isset_bit_vector.set(__REVISION_ISSET_ID, value); + } + + public long getTimestamp() { + return this.timestamp; + } + + public StoreFamilyRevision setTimestamp(long timestamp) { + this.timestamp = timestamp; + setTimestampIsSet(true); + return this; + } + + public void unsetTimestamp() { + __isset_bit_vector.clear(__TIMESTAMP_ISSET_ID); + } + + /** Returns true if field timestamp is set (has been assigned a value) and false otherwise */ + public boolean isSetTimestamp() { + return __isset_bit_vector.get(__TIMESTAMP_ISSET_ID); + } + + public void setTimestampIsSet(boolean value) { + __isset_bit_vector.set(__TIMESTAMP_ISSET_ID, value); + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case REVISION: + if (value == null) { + unsetRevision(); + } else { + setRevision((Long) value); + } + break; + + case TIMESTAMP: + if (value == null) { + unsetTimestamp(); + } else { + setTimestamp((Long) value); + } + break; - public void unsetRevision() { - __isset_bit_vector.clear(__REVISION_ISSET_ID); } + } - /** Returns true if field revision is set (has been assigned a value) and false otherwise */ - public boolean isSetRevision() { - return __isset_bit_vector.get(__REVISION_ISSET_ID); - } + public Object getFieldValue(_Fields field) { + switch (field) { + case REVISION: + return Long.valueOf(getRevision()); - public void setRevisionIsSet(boolean value) { - __isset_bit_vector.set(__REVISION_ISSET_ID, value); - } + case TIMESTAMP: + return Long.valueOf(getTimestamp()); - public long getTimestamp() { - return this.timestamp; } + throw new IllegalStateException(); + } - public StoreFamilyRevision setTimestamp(long timestamp) { - this.timestamp = timestamp; - setTimestampIsSet(true); - return this; + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); } - public void unsetTimestamp() { - __isset_bit_vector.clear(__TIMESTAMP_ISSET_ID); + switch (field) { + case REVISION: + return isSetRevision(); + case TIMESTAMP: + return isSetTimestamp(); } - - /** Returns true if field timestamp is set (has been assigned a value) and false otherwise */ - public boolean isSetTimestamp() { - return __isset_bit_vector.get(__TIMESTAMP_ISSET_ID); - } - - public void setTimestampIsSet(boolean value) { - __isset_bit_vector.set(__TIMESTAMP_ISSET_ID, value); + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof StoreFamilyRevision) + return this.equals((StoreFamilyRevision) that); + return false; + } + + public boolean equals(StoreFamilyRevision that) { + if (that == null) + return false; + + boolean this_present_revision = true; + boolean that_present_revision = true; + if (this_present_revision || that_present_revision) { + if (!(this_present_revision && that_present_revision)) + return false; + if (this.revision != that.revision) + return false; } - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REVISION: - if (value == null) { - unsetRevision(); - } else { - setRevision((Long) value); - } - break; - - case TIMESTAMP: - if (value == null) { - unsetTimestamp(); - } else { - setTimestamp((Long) value); - } - break; - - } + boolean this_present_timestamp = true; + boolean that_present_timestamp = true; + if (this_present_timestamp || that_present_timestamp) { + if (!(this_present_timestamp && that_present_timestamp)) + return false; + if (this.timestamp != that.timestamp) + return false; } - public Object getFieldValue(_Fields field) { - switch (field) { - case REVISION: - return Long.valueOf(getRevision()); + return true; + } - case TIMESTAMP: - return Long.valueOf(getTimestamp()); + @Override + public int hashCode() { + return 0; + } - } - throw new IllegalStateException(); + public int compareTo(StoreFamilyRevision other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); } - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } + int lastComparison = 0; + StoreFamilyRevision typedOther = (StoreFamilyRevision) other; - switch (field) { - case REVISION: - return isSetRevision(); - case TIMESTAMP: - return isSetTimestamp(); - } - throw new IllegalStateException(); + lastComparison = Boolean.valueOf(isSetRevision()).compareTo(typedOther.isSetRevision()); + if (lastComparison != 0) { + return lastComparison; } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof StoreFamilyRevision) - return this.equals((StoreFamilyRevision) that); - return false; + if (isSetRevision()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revision, typedOther.revision); + if (lastComparison != 0) { + return lastComparison; + } } - - public boolean equals(StoreFamilyRevision that) { - if (that == null) - return false; - - boolean this_present_revision = true; - boolean that_present_revision = true; - if (this_present_revision || that_present_revision) { - if (!(this_present_revision && that_present_revision)) - return false; - if (this.revision != that.revision) - return false; - } - - boolean this_present_timestamp = true; - boolean that_present_timestamp = true; - if (this_present_timestamp || that_present_timestamp) { - if (!(this_present_timestamp && that_present_timestamp)) - return false; - if (this.timestamp != that.timestamp) - return false; - } - - return true; + lastComparison = Boolean.valueOf(isSetTimestamp()).compareTo(typedOther.isSetTimestamp()); + if (lastComparison != 0) { + return lastComparison; } - - @Override - public int hashCode() { - return 0; + if (isSetTimestamp()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.timestamp, typedOther.timestamp); + if (lastComparison != 0) { + return lastComparison; + } } - - public int compareTo(StoreFamilyRevision other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - StoreFamilyRevision typedOther = (StoreFamilyRevision) other; - - lastComparison = Boolean.valueOf(isSetRevision()).compareTo(typedOther.isSetRevision()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRevision()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revision, typedOther.revision); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTimestamp()).compareTo(typedOther.isSetTimestamp()); - if (lastComparison != 0) { - return lastComparison; + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField field; + iprot.readStructBegin(); + while (true) { + field = iprot.readFieldBegin(); + if (field.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (field.id) { + case 1: // REVISION + if (field.type == org.apache.thrift.protocol.TType.I64) { + this.revision = iprot.readI64(); + setRevisionIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); } - if (isSetTimestamp()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.timestamp, typedOther.timestamp); - if (lastComparison != 0) { - return lastComparison; - } + break; + case 2: // TIMESTAMP + if (field.type == org.apache.thrift.protocol.TType.I64) { + this.timestamp = iprot.readI64(); + setTimestampIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); } - return 0; + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + } + iprot.readFieldEnd(); } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField field; - iprot.readStructBegin(); - while (true) { - field = iprot.readFieldBegin(); - if (field.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (field.id) { - case 1: // REVISION - if (field.type == org.apache.thrift.protocol.TType.I64) { - this.revision = iprot.readI64(); - setRevisionIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - break; - case 2: // TIMESTAMP - if (field.type == org.apache.thrift.protocol.TType.I64) { - this.timestamp = iprot.readI64(); - setTimestampIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - - // check for required fields of primitive type, which can't be checked in the validate method - validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(REVISION_FIELD_DESC); - oprot.writeI64(this.revision); - oprot.writeFieldEnd(); - oprot.writeFieldBegin(TIMESTAMP_FIELD_DESC); - oprot.writeI64(this.timestamp); - oprot.writeFieldEnd(); - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("StoreFamilyRevision("); - boolean first = true; - - sb.append("revision:"); - sb.append(this.revision); - first = false; - if (!first) sb.append(", "); - sb.append("timestamp:"); - sb.append(this.timestamp); - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } + iprot.readStructEnd(); + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + validate(); + + oprot.writeStructBegin(STRUCT_DESC); + oprot.writeFieldBegin(REVISION_FIELD_DESC); + oprot.writeI64(this.revision); + oprot.writeFieldEnd(); + oprot.writeFieldBegin(TIMESTAMP_FIELD_DESC); + oprot.writeI64(this.timestamp); + oprot.writeFieldEnd(); + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("StoreFamilyRevision("); + boolean first = true; + + sb.append("revision:"); + sb.append(this.revision); + first = false; + if (!first) sb.append(", "); + sb.append("timestamp:"); + sb.append(this.timestamp); + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bit_vector = new BitSet(1); - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bit_vector = new BitSet(1); + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); } + } } diff --git a/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java b/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java index 0f661cb..bc4718e 100644 --- a/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java +++ b/hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java @@ -36,334 +36,334 @@ import java.util.Map; public class StoreFamilyRevisionList implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevisionList"); + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevisionList"); - private static final org.apache.thrift.protocol.TField REVISION_LIST_FIELD_DESC = new org.apache.thrift.protocol.TField("revisionList", org.apache.thrift.protocol.TType.LIST, (short) 1); + private static final org.apache.thrift.protocol.TField REVISION_LIST_FIELD_DESC = new org.apache.thrift.protocol.TField("revisionList", org.apache.thrift.protocol.TType.LIST, (short) 1); - public List revisionList; // required + public List revisionList; // required - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REVISION_LIST((short) 1, "revisionList"); + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + REVISION_LIST((short) 1, "revisionList"); - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch (fieldId) { - case 1: // REVISION_LIST - return REVISION_LIST; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + private static final Map byName = new HashMap(); static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REVISION_LIST, new org.apache.thrift.meta_data.FieldMetaData("revisionList", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, StoreFamilyRevision.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevisionList.class, metaDataMap); + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } } - public StoreFamilyRevisionList() { + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch (fieldId) { + case 1: // REVISION_LIST + return REVISION_LIST; + default: + return null; + } } - public StoreFamilyRevisionList( - List revisionList) { - this(); - this.revisionList = revisionList; + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; } /** - * Performs a deep copy on other. + * Find the _Fields constant that matches name, or null if its not found. */ - public StoreFamilyRevisionList(StoreFamilyRevisionList other) { - if (other.isSetRevisionList()) { - List __this__revisionList = new ArrayList(); - for (StoreFamilyRevision other_element : other.revisionList) { - __this__revisionList.add(new StoreFamilyRevision(other_element)); - } - this.revisionList = __this__revisionList; - } + public static _Fields findByName(String name) { + return byName.get(name); } - public StoreFamilyRevisionList deepCopy() { - return new StoreFamilyRevisionList(this); - } + private final short _thriftId; + private final String _fieldName; - @Override - public void clear() { - this.revisionList = null; + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; } - public int getRevisionListSize() { - return (this.revisionList == null) ? 0 : this.revisionList.size(); + public short getThriftFieldId() { + return _thriftId; } - public java.util.Iterator getRevisionListIterator() { - return (this.revisionList == null) ? null : this.revisionList.iterator(); + public String getFieldName() { + return _fieldName; } - - public void addToRevisionList(StoreFamilyRevision elem) { - if (this.revisionList == null) { - this.revisionList = new ArrayList(); - } - this.revisionList.add(elem); + } + + // isset id assignments + + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.REVISION_LIST, new org.apache.thrift.meta_data.FieldMetaData("revisionList", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, StoreFamilyRevision.class)))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevisionList.class, metaDataMap); + } + + public StoreFamilyRevisionList() { + } + + public StoreFamilyRevisionList( + List revisionList) { + this(); + this.revisionList = revisionList; + } + + /** + * Performs a deep copy on other. + */ + public StoreFamilyRevisionList(StoreFamilyRevisionList other) { + if (other.isSetRevisionList()) { + List __this__revisionList = new ArrayList(); + for (StoreFamilyRevision other_element : other.revisionList) { + __this__revisionList.add(new StoreFamilyRevision(other_element)); + } + this.revisionList = __this__revisionList; } + } - public List getRevisionList() { - return this.revisionList; - } + public StoreFamilyRevisionList deepCopy() { + return new StoreFamilyRevisionList(this); + } - public StoreFamilyRevisionList setRevisionList(List revisionList) { - this.revisionList = revisionList; - return this; - } + @Override + public void clear() { + this.revisionList = null; + } - public void unsetRevisionList() { - this.revisionList = null; - } + public int getRevisionListSize() { + return (this.revisionList == null) ? 0 : this.revisionList.size(); + } - /** Returns true if field revisionList is set (has been assigned a value) and false otherwise */ - public boolean isSetRevisionList() { - return this.revisionList != null; - } + public java.util.Iterator getRevisionListIterator() { + return (this.revisionList == null) ? null : this.revisionList.iterator(); + } - public void setRevisionListIsSet(boolean value) { - if (!value) { - this.revisionList = null; - } + public void addToRevisionList(StoreFamilyRevision elem) { + if (this.revisionList == null) { + this.revisionList = new ArrayList(); } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REVISION_LIST: - if (value == null) { - unsetRevisionList(); - } else { - setRevisionList((List) value); - } - break; - - } + this.revisionList.add(elem); + } + + public List getRevisionList() { + return this.revisionList; + } + + public StoreFamilyRevisionList setRevisionList(List revisionList) { + this.revisionList = revisionList; + return this; + } + + public void unsetRevisionList() { + this.revisionList = null; + } + + /** Returns true if field revisionList is set (has been assigned a value) and false otherwise */ + public boolean isSetRevisionList() { + return this.revisionList != null; + } + + public void setRevisionListIsSet(boolean value) { + if (!value) { + this.revisionList = null; } + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case REVISION_LIST: + if (value == null) { + unsetRevisionList(); + } else { + setRevisionList((List) value); + } + break; - public Object getFieldValue(_Fields field) { - switch (field) { - case REVISION_LIST: - return getRevisionList(); - - } - throw new IllegalStateException(); } + } - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } + public Object getFieldValue(_Fields field) { + switch (field) { + case REVISION_LIST: + return getRevisionList(); - switch (field) { - case REVISION_LIST: - return isSetRevisionList(); - } - throw new IllegalStateException(); } + throw new IllegalStateException(); + } - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof StoreFamilyRevisionList) - return this.equals((StoreFamilyRevisionList) that); - return false; + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); } - public boolean equals(StoreFamilyRevisionList that) { - if (that == null) - return false; - - boolean this_present_revisionList = true && this.isSetRevisionList(); - boolean that_present_revisionList = true && that.isSetRevisionList(); - if (this_present_revisionList || that_present_revisionList) { - if (!(this_present_revisionList && that_present_revisionList)) - return false; - if (!this.revisionList.equals(that.revisionList)) - return false; - } - - return true; + switch (field) { + case REVISION_LIST: + return isSetRevisionList(); } - - @Override - public int hashCode() { - return 0; + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof StoreFamilyRevisionList) + return this.equals((StoreFamilyRevisionList) that); + return false; + } + + public boolean equals(StoreFamilyRevisionList that) { + if (that == null) + return false; + + boolean this_present_revisionList = true && this.isSetRevisionList(); + boolean that_present_revisionList = true && that.isSetRevisionList(); + if (this_present_revisionList || that_present_revisionList) { + if (!(this_present_revisionList && that_present_revisionList)) + return false; + if (!this.revisionList.equals(that.revisionList)) + return false; } - public int compareTo(StoreFamilyRevisionList other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } + return true; + } - int lastComparison = 0; - StoreFamilyRevisionList typedOther = (StoreFamilyRevisionList) other; + @Override + public int hashCode() { + return 0; + } - lastComparison = Boolean.valueOf(isSetRevisionList()).compareTo(typedOther.isSetRevisionList()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRevisionList()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revisionList, typedOther.revisionList); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); + public int compareTo(StoreFamilyRevisionList other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); } - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField field; - iprot.readStructBegin(); - while (true) { - field = iprot.readFieldBegin(); - if (field.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (field.id) { - case 1: // REVISION_LIST - if (field.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list0 = iprot.readListBegin(); - this.revisionList = new ArrayList(_list0.size); - for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - StoreFamilyRevision _elem2; // required - _elem2 = new StoreFamilyRevision(); - _elem2.read(iprot); - this.revisionList.add(_elem2); - } - iprot.readListEnd(); - } - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); + int lastComparison = 0; + StoreFamilyRevisionList typedOther = (StoreFamilyRevisionList) other; - // check for required fields of primitive type, which can't be checked in the validate method - validate(); + lastComparison = Boolean.valueOf(isSetRevisionList()).compareTo(typedOther.isSetRevisionList()); + if (lastComparison != 0) { + return lastComparison; } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (this.revisionList != null) { - oprot.writeFieldBegin(REVISION_LIST_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, this.revisionList.size())); - for (StoreFamilyRevision _iter3 : this.revisionList) { - _iter3.write(oprot); - } - oprot.writeListEnd(); + if (isSetRevisionList()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revisionList, typedOther.revisionList); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField field; + iprot.readStructBegin(); + while (true) { + field = iprot.readFieldBegin(); + if (field.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (field.id) { + case 1: // REVISION_LIST + if (field.type == org.apache.thrift.protocol.TType.LIST) { + { + org.apache.thrift.protocol.TList _list0 = iprot.readListBegin(); + this.revisionList = new ArrayList(_list0.size); + for (int _i1 = 0; _i1 < _list0.size; ++_i1) { + StoreFamilyRevision _elem2; // required + _elem2 = new StoreFamilyRevision(); + _elem2.read(iprot); + this.revisionList.add(_elem2); } - oprot.writeFieldEnd(); + iprot.readListEnd(); + } + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); } - oprot.writeFieldStop(); - oprot.writeStructEnd(); + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + } + iprot.readFieldEnd(); } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("StoreFamilyRevisionList("); - boolean first = true; - - sb.append("revisionList:"); - if (this.revisionList == null) { - sb.append("null"); - } else { - sb.append(this.revisionList); + iprot.readStructEnd(); + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (this.revisionList != null) { + oprot.writeFieldBegin(REVISION_LIST_FIELD_DESC); + { + oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, this.revisionList.size())); + for (StoreFamilyRevision _iter3 : this.revisionList) { + _iter3.write(oprot); } - first = false; - sb.append(")"); - return sb.toString(); + oprot.writeListEnd(); + } + oprot.writeFieldEnd(); } - - public void validate() throws org.apache.thrift.TException { - // check for required fields + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("StoreFamilyRevisionList("); + boolean first = true; + + sb.append("revisionList:"); + if (this.revisionList == null) { + sb.append("null"); + } else { + sb.append(this.revisionList); } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); } + } - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); } + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseAuthorizationProvider.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseAuthorizationProvider.java index ee80389..97ffcff 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseAuthorizationProvider.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseAuthorizationProvider.java @@ -37,108 +37,108 @@ */ class HBaseAuthorizationProvider implements HiveAuthorizationProvider { - @Override - public Configuration getConf() { - return null; - } + @Override + public Configuration getConf() { + return null; + } - @Override - public void setConf(Configuration conf) { - } + @Override + public void setConf(Configuration conf) { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #init(org.apache.hadoop.conf.Configuration) + */ + @Override + public void init(Configuration conf) throws HiveException { + } - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } + @Override + public HiveAuthenticationProvider getAuthenticator() { + return null; + } - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.metastore.api.Database, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.metastore.api.Database, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java index 8cfedfd..c304e48 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java @@ -37,40 +37,40 @@ import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; public class HBaseBaseOutputFormat implements OutputFormat, Put>, - HiveOutputFormat, Put> { + HiveOutputFormat, Put> { - @Override - public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( - JobConf jc, Path finalOutPath, - Class valueClass, boolean isCompressed, - Properties tableProperties, Progressable progress) - throws IOException { - throw new UnsupportedOperationException("Not implemented"); - } + @Override + public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( + JobConf jc, Path finalOutPath, + Class valueClass, boolean isCompressed, + Properties tableProperties, Progressable progress) + throws IOException { + throw new UnsupportedOperationException("Not implemented"); + } - @Override - public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { - OutputFormat, Put> outputFormat = getOutputFormat(job); - outputFormat.checkOutputSpecs(ignored, job); - } + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { + OutputFormat, Put> outputFormat = getOutputFormat(job); + outputFormat.checkOutputSpecs(ignored, job); + } - @Override - public RecordWriter, Put> getRecordWriter(FileSystem ignored, - JobConf job, String name, Progressable progress) throws IOException { - OutputFormat, Put> outputFormat = getOutputFormat(job); - return outputFormat.getRecordWriter(ignored, job, name, progress); - } + @Override + public RecordWriter, Put> getRecordWriter(FileSystem ignored, + JobConf job, String name, Progressable progress) throws IOException { + OutputFormat, Put> outputFormat = getOutputFormat(job); + return outputFormat.getRecordWriter(ignored, job, name, progress); + } - private OutputFormat, Put> getOutputFormat(JobConf job) - throws IOException { - String outputInfo = job.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(outputInfo); - OutputFormat, Put> outputFormat = null; - if (HBaseHCatStorageHandler.isBulkMode(outputJobInfo)) { - outputFormat = new HBaseBulkOutputFormat(); - } else { - outputFormat = new HBaseDirectOutputFormat(); - } - return outputFormat; + private OutputFormat, Put> getOutputFormat(JobConf job) + throws IOException { + String outputInfo = job.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(outputInfo); + OutputFormat, Put> outputFormat = null; + if (HBaseHCatStorageHandler.isBulkMode(outputJobInfo)) { + outputFormat = new HBaseBulkOutputFormat(); + } else { + outputFormat = new HBaseDirectOutputFormat(); } + return outputFormat; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java index 4a188e0..b54dc1f 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java @@ -51,171 +51,171 @@ */ class HBaseBulkOutputFormat extends HBaseBaseOutputFormat { - private final static ImmutableBytesWritable EMPTY_LIST = new ImmutableBytesWritable( - new byte[0]); - private SequenceFileOutputFormat, Put> baseOutputFormat; - - public HBaseBulkOutputFormat() { - baseOutputFormat = new SequenceFileOutputFormat, Put>(); + private final static ImmutableBytesWritable EMPTY_LIST = new ImmutableBytesWritable( + new byte[0]); + private SequenceFileOutputFormat, Put> baseOutputFormat; + + public HBaseBulkOutputFormat() { + baseOutputFormat = new SequenceFileOutputFormat, Put>(); + } + + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) + throws IOException { + baseOutputFormat.checkOutputSpecs(ignored, job); + HBaseUtil.addHBaseDelegationToken(job); + addJTDelegationToken(job); + } + + @Override + public RecordWriter, Put> getRecordWriter( + FileSystem ignored, JobConf job, String name, Progressable progress) + throws IOException { + job.setOutputKeyClass(ImmutableBytesWritable.class); + job.setOutputValueClass(Put.class); + long version = HBaseRevisionManagerUtil.getOutputRevision(job); + return new HBaseBulkRecordWriter(baseOutputFormat.getRecordWriter( + ignored, job, name, progress), version); + } + + private void addJTDelegationToken(JobConf job) throws IOException { + // Get jobTracker delegation token if security is enabled + // we need to launch the ImportSequenceFile job + if (User.isSecurityEnabled()) { + JobClient jobClient = new JobClient(new JobConf(job)); + try { + job.getCredentials().addToken(new Text("my mr token"), + jobClient.getDelegationToken(null)); + } catch (InterruptedException e) { + throw new IOException("Error while getting JT delegation token", e); + } } + } - @Override - public void checkOutputSpecs(FileSystem ignored, JobConf job) - throws IOException { - baseOutputFormat.checkOutputSpecs(ignored, job); - HBaseUtil.addHBaseDelegationToken(job); - addJTDelegationToken(job); - } + private static class HBaseBulkRecordWriter implements + RecordWriter, Put> { - @Override - public RecordWriter, Put> getRecordWriter( - FileSystem ignored, JobConf job, String name, Progressable progress) - throws IOException { - job.setOutputKeyClass(ImmutableBytesWritable.class); - job.setOutputValueClass(Put.class); - long version = HBaseRevisionManagerUtil.getOutputRevision(job); - return new HBaseBulkRecordWriter(baseOutputFormat.getRecordWriter( - ignored, job, name, progress), version); + private RecordWriter, Put> baseWriter; + private final Long outputVersion; + + public HBaseBulkRecordWriter( + RecordWriter, Put> baseWriter, + Long outputVersion) { + this.baseWriter = baseWriter; + this.outputVersion = outputVersion; } - private void addJTDelegationToken(JobConf job) throws IOException { - // Get jobTracker delegation token if security is enabled - // we need to launch the ImportSequenceFile job - if (User.isSecurityEnabled()) { - JobClient jobClient = new JobClient(new JobConf(job)); - try { - job.getCredentials().addToken(new Text("my mr token"), - jobClient.getDelegationToken(null)); - } catch (InterruptedException e) { - throw new IOException("Error while getting JT delegation token", e); - } + @Override + public void write(WritableComparable key, Put value) + throws IOException { + Put put = value; + if (outputVersion != null) { + put = new Put(value.getRow(), outputVersion.longValue()); + for (List row : value.getFamilyMap().values()) { + for (KeyValue el : row) { + put.add(el.getFamily(), el.getQualifier(), el.getValue()); + } } + } + // we ignore the key + baseWriter.write(EMPTY_LIST, put); } - private static class HBaseBulkRecordWriter implements - RecordWriter, Put> { - - private RecordWriter, Put> baseWriter; - private final Long outputVersion; + @Override + public void close(Reporter reporter) throws IOException { + baseWriter.close(reporter); + } + } - public HBaseBulkRecordWriter( - RecordWriter, Put> baseWriter, - Long outputVersion) { - this.baseWriter = baseWriter; - this.outputVersion = outputVersion; - } + public static class HBaseBulkOutputCommitter extends OutputCommitter { - @Override - public void write(WritableComparable key, Put value) - throws IOException { - Put put = value; - if (outputVersion != null) { - put = new Put(value.getRow(), outputVersion.longValue()); - for (List row : value.getFamilyMap().values()) { - for (KeyValue el : row) { - put.add(el.getFamily(), el.getQualifier(), el.getValue()); - } - } - } - // we ignore the key - baseWriter.write(EMPTY_LIST, put); - } + private final OutputCommitter baseOutputCommitter; - @Override - public void close(Reporter reporter) throws IOException { - baseWriter.close(reporter); - } + public HBaseBulkOutputCommitter() { + baseOutputCommitter = new FileOutputCommitter(); } - public static class HBaseBulkOutputCommitter extends OutputCommitter { - - private final OutputCommitter baseOutputCommitter; - - public HBaseBulkOutputCommitter() { - baseOutputCommitter = new FileOutputCommitter(); - } + @Override + public void abortTask(TaskAttemptContext taskContext) + throws IOException { + baseOutputCommitter.abortTask(taskContext); + } - @Override - public void abortTask(TaskAttemptContext taskContext) - throws IOException { - baseOutputCommitter.abortTask(taskContext); - } + @Override + public void commitTask(TaskAttemptContext taskContext) + throws IOException { + // baseOutputCommitter.commitTask(taskContext); + } - @Override - public void commitTask(TaskAttemptContext taskContext) - throws IOException { - // baseOutputCommitter.commitTask(taskContext); - } + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) + throws IOException { + return baseOutputCommitter.needsTaskCommit(taskContext); + } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) - throws IOException { - return baseOutputCommitter.needsTaskCommit(taskContext); - } + @Override + public void setupJob(JobContext jobContext) throws IOException { + baseOutputCommitter.setupJob(jobContext); + } - @Override - public void setupJob(JobContext jobContext) throws IOException { - baseOutputCommitter.setupJob(jobContext); - } + @Override + public void setupTask(TaskAttemptContext taskContext) + throws IOException { + baseOutputCommitter.setupTask(taskContext); + } - @Override - public void setupTask(TaskAttemptContext taskContext) - throws IOException { - baseOutputCommitter.setupTask(taskContext); - } + @Override + public void abortJob(JobContext jobContext, int status) + throws IOException { + baseOutputCommitter.abortJob(jobContext, status); + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + rm.abortWriteTransaction(HBaseRevisionManagerUtil + .getWriteTransaction(jobContext.getConfiguration())); + } finally { + cleanIntermediate(jobContext); + if (rm != null) + rm.close(); + } + } - @Override - public void abortJob(JobContext jobContext, int status) - throws IOException { - baseOutputCommitter.abortJob(jobContext, status); - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil - .getOpenedRevisionManager(jobContext.getConfiguration()); - rm.abortWriteTransaction(HBaseRevisionManagerUtil - .getWriteTransaction(jobContext.getConfiguration())); - } finally { - cleanIntermediate(jobContext); - if (rm != null) - rm.close(); - } + @Override + public void commitJob(JobContext jobContext) throws IOException { + baseOutputCommitter.commitJob(jobContext); + RevisionManager rm = null; + try { + Configuration conf = jobContext.getConfiguration(); + Path srcPath = FileOutputFormat.getOutputPath(jobContext.getJobConf()); + if (!FileSystem.get(conf).exists(srcPath)) { + throw new IOException("Failed to bulk import hfiles. " + + "Intermediate data directory is cleaned up or missing. " + + "Please look at the bulk import job if it exists for failure reason"); } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - baseOutputCommitter.commitJob(jobContext); - RevisionManager rm = null; - try { - Configuration conf = jobContext.getConfiguration(); - Path srcPath = FileOutputFormat.getOutputPath(jobContext.getJobConf()); - if (!FileSystem.get(conf).exists(srcPath)) { - throw new IOException("Failed to bulk import hfiles. " + - "Intermediate data directory is cleaned up or missing. " + - "Please look at the bulk import job if it exists for failure reason"); - } - Path destPath = new Path(srcPath.getParent(), srcPath.getName() + "_hfiles"); - boolean success = ImportSequenceFile.runJob(jobContext, - conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY), - srcPath, - destPath); - if (!success) { - cleanIntermediate(jobContext); - throw new IOException("Failed to bulk import hfiles." + - " Please look at the bulk import job for failure reason"); - } - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(conf)); - cleanIntermediate(jobContext); - } finally { - if (rm != null) - rm.close(); - } + Path destPath = new Path(srcPath.getParent(), srcPath.getName() + "_hfiles"); + boolean success = ImportSequenceFile.runJob(jobContext, + conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY), + srcPath, + destPath); + if (!success) { + cleanIntermediate(jobContext); + throw new IOException("Failed to bulk import hfiles." + + " Please look at the bulk import job for failure reason"); } + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(conf)); + cleanIntermediate(jobContext); + } finally { + if (rm != null) + rm.close(); + } + } - private void cleanIntermediate(JobContext jobContext) - throws IOException { - FileSystem fs = FileSystem.get(jobContext.getConfiguration()); - fs.delete(FileOutputFormat.getOutputPath(jobContext.getJobConf()), true); - } + private void cleanIntermediate(JobContext jobContext) + throws IOException { + FileSystem fs = FileSystem.get(jobContext.getConfiguration()); + fs.delete(FileOutputFormat.getOutputPath(jobContext.getJobConf()), true); } + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java index 066419a..242b546 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java @@ -26,16 +26,16 @@ */ class HBaseConstants { - /** key used to store write transaction object */ - public static final String PROPERTY_WRITE_TXN_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.writeTxn"; + /** key used to store write transaction object */ + public static final String PROPERTY_WRITE_TXN_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.writeTxn"; - /** key used to define the name of the table to write to */ - public static final String PROPERTY_OUTPUT_TABLE_NAME_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName"; + /** key used to define the name of the table to write to */ + public static final String PROPERTY_OUTPUT_TABLE_NAME_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName"; - /** key used to define whether bulk storage output format will be used or not */ - public static final String PROPERTY_BULK_OUTPUT_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.output.bulkMode"; + /** key used to define whether bulk storage output format will be used or not */ + public static final String PROPERTY_BULK_OUTPUT_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.output.bulkMode"; - /** key used to define the hbase table snapshot. */ - public static final String PROPERTY_TABLE_SNAPSHOT_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + "hbase.table.snapshot"; + /** key used to define the hbase table snapshot. */ + public static final String PROPERTY_TABLE_SNAPSHOT_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + "hbase.table.snapshot"; } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java index b7537d4..2b31af3 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java @@ -46,122 +46,122 @@ */ class HBaseDirectOutputFormat extends HBaseBaseOutputFormat { - private TableOutputFormat outputFormat; - - public HBaseDirectOutputFormat() { - this.outputFormat = new TableOutputFormat(); + private TableOutputFormat outputFormat; + + public HBaseDirectOutputFormat() { + this.outputFormat = new TableOutputFormat(); + } + + @Override + public RecordWriter, Put> getRecordWriter(FileSystem ignored, + JobConf job, String name, Progressable progress) + throws IOException { + long version = HBaseRevisionManagerUtil.getOutputRevision(job); + return new HBaseDirectRecordWriter(outputFormat.getRecordWriter(ignored, job, name, + progress), version); + } + + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) + throws IOException { + outputFormat.checkOutputSpecs(ignored, job); + HBaseUtil.addHBaseDelegationToken(job); + } + + private static class HBaseDirectRecordWriter implements + RecordWriter, Put> { + + private RecordWriter, Put> baseWriter; + private final Long outputVersion; + + public HBaseDirectRecordWriter( + RecordWriter, Put> baseWriter, + Long outputVersion) { + this.baseWriter = baseWriter; + this.outputVersion = outputVersion; } @Override - public RecordWriter, Put> getRecordWriter(FileSystem ignored, - JobConf job, String name, Progressable progress) - throws IOException { - long version = HBaseRevisionManagerUtil.getOutputRevision(job); - return new HBaseDirectRecordWriter(outputFormat.getRecordWriter(ignored, job, name, - progress), version); + public void write(WritableComparable key, Put value) + throws IOException { + Put put = value; + if (outputVersion != null) { + put = new Put(value.getRow(), outputVersion.longValue()); + for (List row : value.getFamilyMap().values()) { + for (KeyValue el : row) { + put.add(el.getFamily(), el.getQualifier(), el.getValue()); + } + } + } + baseWriter.write(key, put); } @Override - public void checkOutputSpecs(FileSystem ignored, JobConf job) - throws IOException { - outputFormat.checkOutputSpecs(ignored, job); - HBaseUtil.addHBaseDelegationToken(job); + public void close(Reporter reporter) throws IOException { + baseWriter.close(reporter); } - private static class HBaseDirectRecordWriter implements - RecordWriter, Put> { - - private RecordWriter, Put> baseWriter; - private final Long outputVersion; - - public HBaseDirectRecordWriter( - RecordWriter, Put> baseWriter, - Long outputVersion) { - this.baseWriter = baseWriter; - this.outputVersion = outputVersion; - } - - @Override - public void write(WritableComparable key, Put value) - throws IOException { - Put put = value; - if (outputVersion != null) { - put = new Put(value.getRow(), outputVersion.longValue()); - for (List row : value.getFamilyMap().values()) { - for (KeyValue el : row) { - put.add(el.getFamily(), el.getQualifier(), el.getValue()); - } - } - } - baseWriter.write(key, put); - } + } - @Override - public void close(Reporter reporter) throws IOException { - baseWriter.close(reporter); - } + public static class HBaseDirectOutputCommitter extends OutputCommitter { + public HBaseDirectOutputCommitter() throws IOException { } - public static class HBaseDirectOutputCommitter extends OutputCommitter { - - public HBaseDirectOutputCommitter() throws IOException { - } - - @Override - public void abortTask(TaskAttemptContext taskContext) - throws IOException { - } + @Override + public void abortTask(TaskAttemptContext taskContext) + throws IOException { + } - @Override - public void commitTask(TaskAttemptContext taskContext) - throws IOException { - } + @Override + public void commitTask(TaskAttemptContext taskContext) + throws IOException { + } - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) - throws IOException { - return false; - } + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) + throws IOException { + return false; + } - @Override - public void setupJob(JobContext jobContext) throws IOException { - } + @Override + public void setupJob(JobContext jobContext) throws IOException { + } - @Override - public void setupTask(TaskAttemptContext taskContext) - throws IOException { - } + @Override + public void setupTask(TaskAttemptContext taskContext) + throws IOException { + } - @Override - public void abortJob(JobContext jobContext, int status) - throws IOException { - super.abortJob(jobContext, status); - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil - .getOpenedRevisionManager(jobContext.getConfiguration()); - Transaction writeTransaction = HBaseRevisionManagerUtil - .getWriteTransaction(jobContext.getConfiguration()); - rm.abortWriteTransaction(writeTransaction); - } finally { - if (rm != null) - rm.close(); - } - } + @Override + public void abortJob(JobContext jobContext, int status) + throws IOException { + super.abortJob(jobContext, status); + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + Transaction writeTransaction = HBaseRevisionManagerUtil + .getWriteTransaction(jobContext.getConfiguration()); + rm.abortWriteTransaction(writeTransaction); + } finally { + if (rm != null) + rm.close(); + } + } - @Override - public void commitJob(JobContext jobContext) throws IOException { - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil - .getOpenedRevisionManager(jobContext.getConfiguration()); - rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(jobContext - .getConfiguration())); - } finally { - if (rm != null) - rm.close(); - } - } + @Override + public void commitJob(JobContext jobContext) throws IOException { + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(jobContext + .getConfiguration())); + } finally { + if (rm != null) + rm.close(); + } } + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java index 6644d26..bbb1605 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java @@ -80,531 +80,531 @@ */ public class HBaseHCatStorageHandler extends HCatStorageHandler implements HiveMetaHook, Configurable { - public final static String DEFAULT_PREFIX = "default."; - private final static String PROPERTY_INT_OUTPUT_LOCATION = "hcat.hbase.mapreduce.intermediateOutputLocation"; - - private Configuration hbaseConf; - private Configuration jobConf; - private HBaseAdmin admin; - - @Override - public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { - // Populate jobProperties with input table name, table columns, RM snapshot, - // hbase-default.xml and hbase-site.xml - Map tableJobProperties = tableDesc.getJobProperties(); - String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_JOB_INFO); - try { - InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); - HCatTableInfo tableInfo = inputJobInfo.getTableInfo(); - String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); - jobProperties.put(TableInputFormat.INPUT_TABLE, qualifiedTableName); - - Configuration jobConf = getJobConf(); - addResources(jobConf, jobProperties); - JobConf copyOfConf = new JobConf(jobConf); - HBaseConfiguration.addHbaseResources(copyOfConf); - //Getting hbase delegation token in getInputSplits does not work with PIG. So need to - //do it here - if (jobConf instanceof JobConf) { //Should be the case - HBaseUtil.addHBaseDelegationToken(copyOfConf); - ((JobConf) jobConf).getCredentials().addAll(copyOfConf.getCredentials()); - } - - String outputSchema = jobConf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); - jobProperties.put(TableInputFormat.SCAN_COLUMNS, getScanColumns(tableInfo, outputSchema)); - - String serSnapshot = (String) inputJobInfo.getProperties().get( - HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); - if (serSnapshot == null) { - HCatTableSnapshot snapshot = - HBaseRevisionManagerUtil.createSnapshot( - RevisionManagerConfiguration.create(copyOfConf), - qualifiedTableName, tableInfo); - jobProperties.put(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, - HCatUtil.serialize(snapshot)); - } - - //This adds it directly to the jobConf. Setting in jobProperties does not get propagated - //to JobConf as of now as the jobProperties is maintained per partition - //TODO: Remove when HCAT-308 is fixed - addOutputDependencyJars(jobConf); - jobProperties.put("tmpjars", jobConf.get("tmpjars")); - - } catch (IOException e) { - throw new IllegalStateException("Error while configuring job properties", e); - } - } - - @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { - // Populate jobProperties with output table name, hbase-default.xml, hbase-site.xml, OutputJobInfo - // Populate RM transaction in OutputJobInfo - // In case of bulk mode, populate intermediate output location - Map tableJobProperties = tableDesc.getJobProperties(); - String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - try { - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); - HCatTableInfo tableInfo = outputJobInfo.getTableInfo(); - String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); - jobProperties.put(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, qualifiedTableName); - jobProperties.put(TableOutputFormat.OUTPUT_TABLE, qualifiedTableName); - - Configuration jobConf = getJobConf(); - addResources(jobConf, jobProperties); - - Configuration copyOfConf = new Configuration(jobConf); - HBaseConfiguration.addHbaseResources(copyOfConf); - - String txnString = outputJobInfo.getProperties().getProperty( - HBaseConstants.PROPERTY_WRITE_TXN_KEY); - Transaction txn = null; - if (txnString == null) { - txn = HBaseRevisionManagerUtil.beginWriteTransaction(qualifiedTableName, tableInfo, - RevisionManagerConfiguration.create(copyOfConf)); - String serializedTxn = HCatUtil.serialize(txn); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - serializedTxn); - } else { - txn = (Transaction) HCatUtil.deserialize(txnString); - } - if (isBulkMode(outputJobInfo)) { - String tableLocation = tableInfo.getTableLocation(); - String location = new Path(tableLocation, "REVISION_" + txn.getRevisionNumber()) - .toString(); - outputJobInfo.getProperties().setProperty(PROPERTY_INT_OUTPUT_LOCATION, location); - // We are writing out an intermediate sequenceFile hence - // location is not passed in OutputJobInfo.getLocation() - // TODO replace this with a mapreduce constant when available - jobProperties.put("mapred.output.dir", location); - jobProperties.put("mapred.output.committer.class", HBaseBulkOutputCommitter.class.getName()); - } else { - jobProperties.put("mapred.output.committer.class", HBaseDirectOutputCommitter.class.getName()); - } - - jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - addOutputDependencyJars(jobConf); - jobProperties.put("tmpjars", jobConf.get("tmpjars")); - - } catch (IOException e) { - throw new IllegalStateException("Error while configuring job properties", e); - } + public final static String DEFAULT_PREFIX = "default."; + private final static String PROPERTY_INT_OUTPUT_LOCATION = "hcat.hbase.mapreduce.intermediateOutputLocation"; + + private Configuration hbaseConf; + private Configuration jobConf; + private HBaseAdmin admin; + + @Override + public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + // Populate jobProperties with input table name, table columns, RM snapshot, + // hbase-default.xml and hbase-site.xml + Map tableJobProperties = tableDesc.getJobProperties(); + String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_JOB_INFO); + try { + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); + HCatTableInfo tableInfo = inputJobInfo.getTableInfo(); + String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); + jobProperties.put(TableInputFormat.INPUT_TABLE, qualifiedTableName); + + Configuration jobConf = getJobConf(); + addResources(jobConf, jobProperties); + JobConf copyOfConf = new JobConf(jobConf); + HBaseConfiguration.addHbaseResources(copyOfConf); + //Getting hbase delegation token in getInputSplits does not work with PIG. So need to + //do it here + if (jobConf instanceof JobConf) { //Should be the case + HBaseUtil.addHBaseDelegationToken(copyOfConf); + ((JobConf) jobConf).getCredentials().addAll(copyOfConf.getCredentials()); + } + + String outputSchema = jobConf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); + jobProperties.put(TableInputFormat.SCAN_COLUMNS, getScanColumns(tableInfo, outputSchema)); + + String serSnapshot = (String) inputJobInfo.getProperties().get( + HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); + if (serSnapshot == null) { + HCatTableSnapshot snapshot = + HBaseRevisionManagerUtil.createSnapshot( + RevisionManagerConfiguration.create(copyOfConf), + qualifiedTableName, tableInfo); + jobProperties.put(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, + HCatUtil.serialize(snapshot)); + } + + //This adds it directly to the jobConf. Setting in jobProperties does not get propagated + //to JobConf as of now as the jobProperties is maintained per partition + //TODO: Remove when HCAT-308 is fixed + addOutputDependencyJars(jobConf); + jobProperties.put("tmpjars", jobConf.get("tmpjars")); + + } catch (IOException e) { + throw new IllegalStateException("Error while configuring job properties", e); } - - /* - * @return instance of HiveAuthorizationProvider - * - * @throws HiveException - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler# - * getAuthorizationProvider() - */ - @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - - HBaseAuthorizationProvider hbaseAuth = new HBaseAuthorizationProvider(); - hbaseAuth.init(getConf()); - return hbaseAuth; + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + // Populate jobProperties with output table name, hbase-default.xml, hbase-site.xml, OutputJobInfo + // Populate RM transaction in OutputJobInfo + // In case of bulk mode, populate intermediate output location + Map tableJobProperties = tableDesc.getJobProperties(); + String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + try { + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); + HCatTableInfo tableInfo = outputJobInfo.getTableInfo(); + String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); + jobProperties.put(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, qualifiedTableName); + jobProperties.put(TableOutputFormat.OUTPUT_TABLE, qualifiedTableName); + + Configuration jobConf = getJobConf(); + addResources(jobConf, jobProperties); + + Configuration copyOfConf = new Configuration(jobConf); + HBaseConfiguration.addHbaseResources(copyOfConf); + + String txnString = outputJobInfo.getProperties().getProperty( + HBaseConstants.PROPERTY_WRITE_TXN_KEY); + Transaction txn = null; + if (txnString == null) { + txn = HBaseRevisionManagerUtil.beginWriteTransaction(qualifiedTableName, tableInfo, + RevisionManagerConfiguration.create(copyOfConf)); + String serializedTxn = HCatUtil.serialize(txn); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + serializedTxn); + } else { + txn = (Transaction) HCatUtil.deserialize(txnString); + } + if (isBulkMode(outputJobInfo)) { + String tableLocation = tableInfo.getTableLocation(); + String location = new Path(tableLocation, "REVISION_" + txn.getRevisionNumber()) + .toString(); + outputJobInfo.getProperties().setProperty(PROPERTY_INT_OUTPUT_LOCATION, location); + // We are writing out an intermediate sequenceFile hence + // location is not passed in OutputJobInfo.getLocation() + // TODO replace this with a mapreduce constant when available + jobProperties.put("mapred.output.dir", location); + jobProperties.put("mapred.output.committer.class", HBaseBulkOutputCommitter.class.getName()); + } else { + jobProperties.put("mapred.output.committer.class", HBaseDirectOutputCommitter.class.getName()); + } + + jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + addOutputDependencyJars(jobConf); + jobProperties.put("tmpjars", jobConf.get("tmpjars")); + + } catch (IOException e) { + throw new IllegalStateException("Error while configuring job properties", e); } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler - * #commitCreateTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void commitCreateTable(Table table) throws MetaException { - } - - /* - * @param instance of table - * - * @param deleteData - * - * @throws MetaException - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler - * #commitDropTable(org.apache.hadoop.hive.metastore.api.Table, boolean) - */ - @Override - public void commitDropTable(Table tbl, boolean deleteData) - throws MetaException { - checkDeleteTable(tbl); - - } - - /* - * @param instance of table - * - * @throws MetaException - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler - * #preCreateTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void preCreateTable(Table tbl) throws MetaException { - boolean isExternal = MetaStoreUtils.isExternalTable(tbl); - - hbaseConf = getConf(); - - if (tbl.getSd().getLocation() != null) { - throw new MetaException("LOCATION may not be specified for HBase."); - } - - try { - String tableName = getFullyQualifiedHBaseTableName(tbl); - String hbaseColumnsMapping = tbl.getParameters().get( - HBaseSerDe.HBASE_COLUMNS_MAPPING); - - if (hbaseColumnsMapping == null) { - throw new MetaException( - "No hbase.columns.mapping defined in table" - + " properties."); - } - - List hbaseColumnFamilies = new ArrayList(); - List hbaseColumnQualifiers = new ArrayList(); - List hbaseColumnFamiliesBytes = new ArrayList(); - int iKey = HBaseUtil.parseColumnMapping(hbaseColumnsMapping, - hbaseColumnFamilies, hbaseColumnFamiliesBytes, - hbaseColumnQualifiers, null); - - HTableDescriptor tableDesc; - Set uniqueColumnFamilies = new HashSet(); - if (!getHBaseAdmin().tableExists(tableName)) { - // if it is not an external table then create one - if (!isExternal) { - // Create the column descriptors - tableDesc = new HTableDescriptor(tableName); - uniqueColumnFamilies.addAll(hbaseColumnFamilies); - uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey)); - - for (String columnFamily : uniqueColumnFamilies) { - HColumnDescriptor familyDesc = new HColumnDescriptor(Bytes - .toBytes(columnFamily)); - familyDesc.setMaxVersions(Integer.MAX_VALUE); - tableDesc.addFamily(familyDesc); - } - - getHBaseAdmin().createTable(tableDesc); - } else { - // an external table - throw new MetaException("HBase table " + tableName - + " doesn't exist while the table is " - + "declared as an external table."); - } - - } else { - if (!isExternal) { - throw new MetaException("Table " + tableName - + " already exists within HBase." - + " Use CREATE EXTERNAL TABLE instead to" - + " register it in HCatalog."); - } - // make sure the schema mapping is right - tableDesc = getHBaseAdmin().getTableDescriptor( - Bytes.toBytes(tableName)); - - for (int i = 0; i < hbaseColumnFamilies.size(); i++) { - if (i == iKey) { - continue; - } - - if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) { - throw new MetaException("Column Family " - + hbaseColumnFamilies.get(i) - + " is not defined in hbase table " + tableName); - } - } - } - - // ensure the table is online - new HTable(hbaseConf, tableDesc.getName()); - - //Set up table in revision manager. - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); - rm.createTable(tableName, new ArrayList(uniqueColumnFamilies)); - - } catch (MasterNotRunningException mnre) { - throw new MetaException(StringUtils.stringifyException(mnre)); - } catch (IOException ie) { - throw new MetaException(StringUtils.stringifyException(ie)); - } catch (IllegalArgumentException iae) { - throw new MetaException(StringUtils.stringifyException(iae)); - } - - } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler - * #preDropTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void preDropTable(Table table) throws MetaException { - } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler - * #rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void rollbackCreateTable(Table table) throws MetaException { - checkDeleteTable(table); - } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler - * #rollbackDropTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void rollbackDropTable(Table table) throws MetaException { + } + + /* + * @return instance of HiveAuthorizationProvider + * + * @throws HiveException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler# + * getAuthorizationProvider() + */ + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + + HBaseAuthorizationProvider hbaseAuth = new HBaseAuthorizationProvider(); + hbaseAuth.init(getConf()); + return hbaseAuth; + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #commitCreateTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void commitCreateTable(Table table) throws MetaException { + } + + /* + * @param instance of table + * + * @param deleteData + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #commitDropTable(org.apache.hadoop.hive.metastore.api.Table, boolean) + */ + @Override + public void commitDropTable(Table tbl, boolean deleteData) + throws MetaException { + checkDeleteTable(tbl); + + } + + /* + * @param instance of table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #preCreateTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void preCreateTable(Table tbl) throws MetaException { + boolean isExternal = MetaStoreUtils.isExternalTable(tbl); + + hbaseConf = getConf(); + + if (tbl.getSd().getLocation() != null) { + throw new MetaException("LOCATION may not be specified for HBase."); } - /* - * @return instance of HiveMetaHook - * - * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler#getMetaHook() - */ - @Override - public HiveMetaHook getMetaHook() { - return this; - } - - private HBaseAdmin getHBaseAdmin() throws MetaException { - try { - if (admin == null) { - admin = new HBaseAdmin(this.getConf()); - } - return admin; - } catch (MasterNotRunningException mnre) { - throw new MetaException(StringUtils.stringifyException(mnre)); - } catch (ZooKeeperConnectionException zkce) { - throw new MetaException(StringUtils.stringifyException(zkce)); + try { + String tableName = getFullyQualifiedHBaseTableName(tbl); + String hbaseColumnsMapping = tbl.getParameters().get( + HBaseSerDe.HBASE_COLUMNS_MAPPING); + + if (hbaseColumnsMapping == null) { + throw new MetaException( + "No hbase.columns.mapping defined in table" + + " properties."); + } + + List hbaseColumnFamilies = new ArrayList(); + List hbaseColumnQualifiers = new ArrayList(); + List hbaseColumnFamiliesBytes = new ArrayList(); + int iKey = HBaseUtil.parseColumnMapping(hbaseColumnsMapping, + hbaseColumnFamilies, hbaseColumnFamiliesBytes, + hbaseColumnQualifiers, null); + + HTableDescriptor tableDesc; + Set uniqueColumnFamilies = new HashSet(); + if (!getHBaseAdmin().tableExists(tableName)) { + // if it is not an external table then create one + if (!isExternal) { + // Create the column descriptors + tableDesc = new HTableDescriptor(tableName); + uniqueColumnFamilies.addAll(hbaseColumnFamilies); + uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey)); + + for (String columnFamily : uniqueColumnFamilies) { + HColumnDescriptor familyDesc = new HColumnDescriptor(Bytes + .toBytes(columnFamily)); + familyDesc.setMaxVersions(Integer.MAX_VALUE); + tableDesc.addFamily(familyDesc); + } + + getHBaseAdmin().createTable(tableDesc); + } else { + // an external table + throw new MetaException("HBase table " + tableName + + " doesn't exist while the table is " + + "declared as an external table."); } - } - private String getFullyQualifiedHBaseTableName(Table tbl) { - String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME); - if (tableName == null) { - tableName = tbl.getSd().getSerdeInfo().getParameters() - .get(HBaseSerDe.HBASE_TABLE_NAME); - } - if (tableName == null) { - if (tbl.getDbName().equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { - tableName = tbl.getTableName(); - } else { - tableName = tbl.getDbName() + "." + tbl.getTableName(); - } - tableName = tableName.toLowerCase(); + } else { + if (!isExternal) { + throw new MetaException("Table " + tableName + + " already exists within HBase." + + " Use CREATE EXTERNAL TABLE instead to" + + " register it in HCatalog."); } - return tableName; - } - - static String getFullyQualifiedHBaseTableName(HCatTableInfo tableInfo) { - String qualifiedName = tableInfo.getStorerInfo().getProperties() - .getProperty(HBaseSerDe.HBASE_TABLE_NAME); - if (qualifiedName == null) { - String databaseName = tableInfo.getDatabaseName(); - String tableName = tableInfo.getTableName(); - if ((databaseName == null) - || (databaseName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME))) { - qualifiedName = tableName; - } else { - qualifiedName = databaseName + "." + tableName; - } - qualifiedName = qualifiedName.toLowerCase(); + // make sure the schema mapping is right + tableDesc = getHBaseAdmin().getTableDescriptor( + Bytes.toBytes(tableName)); + + for (int i = 0; i < hbaseColumnFamilies.size(); i++) { + if (i == iKey) { + continue; + } + + if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) { + throw new MetaException("Column Family " + + hbaseColumnFamilies.get(i) + + " is not defined in hbase table " + tableName); + } } - return qualifiedName; - } + } - @Override - public Class getInputFormatClass() { - return HBaseInputFormat.class; - } + // ensure the table is online + new HTable(hbaseConf, tableDesc.getName()); - @Override - public Class getOutputFormatClass() { - return HBaseBaseOutputFormat.class; - } + //Set up table in revision manager. + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); + rm.createTable(tableName, new ArrayList(uniqueColumnFamilies)); - /* - * @return subclass of SerDe - * - * @throws UnsupportedOperationException - * - * @see - * org.apache.hive.hcatalog.storagehandler.HCatStorageHandler#getSerDeClass() - */ - @Override - public Class getSerDeClass() - throws UnsupportedOperationException { - return HBaseSerDe.class; + } catch (MasterNotRunningException mnre) { + throw new MetaException(StringUtils.stringifyException(mnre)); + } catch (IOException ie) { + throw new MetaException(StringUtils.stringifyException(ie)); + } catch (IllegalArgumentException iae) { + throw new MetaException(StringUtils.stringifyException(iae)); } - public Configuration getJobConf() { - return jobConf; + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #preDropTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void preDropTable(Table table) throws MetaException { + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void rollbackCreateTable(Table table) throws MetaException { + checkDeleteTable(table); + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #rollbackDropTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void rollbackDropTable(Table table) throws MetaException { + } + + /* + * @return instance of HiveMetaHook + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler#getMetaHook() + */ + @Override + public HiveMetaHook getMetaHook() { + return this; + } + + private HBaseAdmin getHBaseAdmin() throws MetaException { + try { + if (admin == null) { + admin = new HBaseAdmin(this.getConf()); + } + return admin; + } catch (MasterNotRunningException mnre) { + throw new MetaException(StringUtils.stringifyException(mnre)); + } catch (ZooKeeperConnectionException zkce) { + throw new MetaException(StringUtils.stringifyException(zkce)); } + } - @Override - public Configuration getConf() { - - if (hbaseConf == null) { - hbaseConf = HBaseConfiguration.create(); - } - return hbaseConf; + private String getFullyQualifiedHBaseTableName(Table tbl) { + String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME); + if (tableName == null) { + tableName = tbl.getSd().getSerdeInfo().getParameters() + .get(HBaseSerDe.HBASE_TABLE_NAME); } - - @Override - public void setConf(Configuration conf) { - //setConf is called both during DDL operations and mapred read/write jobs. - //Creating a copy of conf for DDL and adding hbase-default and hbase-site.xml to it. - //For jobs, maintaining a reference instead of cloning as we need to - // 1) add hbase delegation token to the Credentials. - // 2) set tmpjars on it. Putting in jobProperties does not get propagated to JobConf - // in case of InputFormat as they are maintained per partition. - //Not adding hbase-default.xml and hbase-site.xml to jobConf as it will override any - //hbase properties set in the JobConf by the user. In configureInputJobProperties and - //configureOutputJobProperties, we take care of adding the default properties - //that are not already present. TODO: Change to a copy for jobs after HCAT-308 is fixed. - jobConf = conf; - hbaseConf = RevisionManagerConfiguration.create(HBaseConfiguration.create(conf)); + if (tableName == null) { + if (tbl.getDbName().equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { + tableName = tbl.getTableName(); + } else { + tableName = tbl.getDbName() + "." + tbl.getTableName(); + } + tableName = tableName.toLowerCase(); } - - private void checkDeleteTable(Table table) throws MetaException { - boolean isExternal = MetaStoreUtils.isExternalTable(table); - String tableName = getFullyQualifiedHBaseTableName(table); - RevisionManager rm = null; - try { - if (!isExternal && getHBaseAdmin().tableExists(tableName)) { - // we have created an HBase table, so we delete it to roll back; - if (getHBaseAdmin().isTableEnabled(tableName)) { - getHBaseAdmin().disableTable(tableName); - } - getHBaseAdmin().deleteTable(tableName); - - //Drop table in revision manager. - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); - rm.dropTable(tableName); - } - } catch (IOException ie) { - throw new MetaException(StringUtils.stringifyException(ie)); - } finally { - HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); - } + return tableName; + } + + static String getFullyQualifiedHBaseTableName(HCatTableInfo tableInfo) { + String qualifiedName = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseSerDe.HBASE_TABLE_NAME); + if (qualifiedName == null) { + String databaseName = tableInfo.getDatabaseName(); + String tableName = tableInfo.getTableName(); + if ((databaseName == null) + || (databaseName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME))) { + qualifiedName = tableName; + } else { + qualifiedName = databaseName + "." + tableName; + } + qualifiedName = qualifiedName.toLowerCase(); } - - /** - * Helper method for users to add the required depedency jars to distributed cache. - * @param conf - * @throws IOException - */ - private void addOutputDependencyJars(Configuration conf) throws IOException { - TableMapReduceUtil.addDependencyJars(conf, - //ZK - ZooKeeper.class, - //HBase - HTable.class, - //Hive - HiveException.class, - //HCatalog jar - HCatOutputFormat.class, - //hcat hbase storage handler jar - HBaseHCatStorageHandler.class, - //hive hbase storage handler jar - HBaseSerDe.class, - //hive jar - Table.class, - //libthrift jar - TBase.class, - //hbase jar - Bytes.class, - //thrift-fb303 .jar - FacebookBase.class, - //guava jar - ThreadFactoryBuilder.class); + return qualifiedName; + } + + @Override + public Class getInputFormatClass() { + return HBaseInputFormat.class; + } + + @Override + public Class getOutputFormatClass() { + return HBaseBaseOutputFormat.class; + } + + /* + * @return subclass of SerDe + * + * @throws UnsupportedOperationException + * + * @see + * org.apache.hive.hcatalog.storagehandler.HCatStorageHandler#getSerDeClass() + */ + @Override + public Class getSerDeClass() + throws UnsupportedOperationException { + return HBaseSerDe.class; + } + + public Configuration getJobConf() { + return jobConf; + } + + @Override + public Configuration getConf() { + + if (hbaseConf == null) { + hbaseConf = HBaseConfiguration.create(); } - - /** - * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map - * if they are not already present in the jobConf. - * @param jobConf Job configuration - * @param newJobProperties Map to which new properties should be added - */ - private void addResources(Configuration jobConf, - Map newJobProperties) { - Configuration conf = new Configuration(false); - HBaseConfiguration.addHbaseResources(conf); - RevisionManagerConfiguration.addResources(conf); - for (Entry entry : conf) { - if (jobConf.get(entry.getKey()) == null) - newJobProperties.put(entry.getKey(), entry.getValue()); + return hbaseConf; + } + + @Override + public void setConf(Configuration conf) { + //setConf is called both during DDL operations and mapred read/write jobs. + //Creating a copy of conf for DDL and adding hbase-default and hbase-site.xml to it. + //For jobs, maintaining a reference instead of cloning as we need to + // 1) add hbase delegation token to the Credentials. + // 2) set tmpjars on it. Putting in jobProperties does not get propagated to JobConf + // in case of InputFormat as they are maintained per partition. + //Not adding hbase-default.xml and hbase-site.xml to jobConf as it will override any + //hbase properties set in the JobConf by the user. In configureInputJobProperties and + //configureOutputJobProperties, we take care of adding the default properties + //that are not already present. TODO: Change to a copy for jobs after HCAT-308 is fixed. + jobConf = conf; + hbaseConf = RevisionManagerConfiguration.create(HBaseConfiguration.create(conf)); + } + + private void checkDeleteTable(Table table) throws MetaException { + boolean isExternal = MetaStoreUtils.isExternalTable(table); + String tableName = getFullyQualifiedHBaseTableName(table); + RevisionManager rm = null; + try { + if (!isExternal && getHBaseAdmin().tableExists(tableName)) { + // we have created an HBase table, so we delete it to roll back; + if (getHBaseAdmin().isTableEnabled(tableName)) { + getHBaseAdmin().disableTable(tableName); } + getHBaseAdmin().deleteTable(tableName); + + //Drop table in revision manager. + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); + rm.dropTable(tableName); + } + } catch (IOException ie) { + throw new MetaException(StringUtils.stringifyException(ie)); + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); } - - public static boolean isBulkMode(OutputJobInfo outputJobInfo) { - //Default is false - String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties() - .getProperty(HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY, - "false"); - return "true".equals(bulkMode); + } + + /** + * Helper method for users to add the required depedency jars to distributed cache. + * @param conf + * @throws IOException + */ + private void addOutputDependencyJars(Configuration conf) throws IOException { + TableMapReduceUtil.addDependencyJars(conf, + //ZK + ZooKeeper.class, + //HBase + HTable.class, + //Hive + HiveException.class, + //HCatalog jar + HCatOutputFormat.class, + //hcat hbase storage handler jar + HBaseHCatStorageHandler.class, + //hive hbase storage handler jar + HBaseSerDe.class, + //hive jar + Table.class, + //libthrift jar + TBase.class, + //hbase jar + Bytes.class, + //thrift-fb303 .jar + FacebookBase.class, + //guava jar + ThreadFactoryBuilder.class); + } + + /** + * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map + * if they are not already present in the jobConf. + * @param jobConf Job configuration + * @param newJobProperties Map to which new properties should be added + */ + private void addResources(Configuration jobConf, + Map newJobProperties) { + Configuration conf = new Configuration(false); + HBaseConfiguration.addHbaseResources(conf); + RevisionManagerConfiguration.addResources(conf); + for (Entry entry : conf) { + if (jobConf.get(entry.getKey()) == null) + newJobProperties.put(entry.getKey(), entry.getValue()); } - - private String getScanColumns(HCatTableInfo tableInfo, String outputColSchema) throws IOException { - StringBuilder builder = new StringBuilder(); - String hbaseColumnMapping = tableInfo.getStorerInfo().getProperties() - .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); - if (outputColSchema == null) { - String[] splits = hbaseColumnMapping.split("[,]"); - for (int i = 0; i < splits.length; i++) { - if (!splits[i].equals(HBaseSerDe.HBASE_KEY_COL)) - builder.append(splits[i]).append(" "); - } - } else { - HCatSchema outputSchema = (HCatSchema) HCatUtil.deserialize(outputColSchema); - HCatSchema tableSchema = tableInfo.getDataColumns(); - List outputFieldNames = outputSchema.getFieldNames(); - List outputColumnMapping = new ArrayList(); - for (String fieldName : outputFieldNames) { - int position = tableSchema.getPosition(fieldName); - outputColumnMapping.add(position); - } - List columnFamilies = new ArrayList(); - List columnQualifiers = new ArrayList(); - HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, null, - columnQualifiers, null); - for (int i = 0; i < outputColumnMapping.size(); i++) { - int cfIndex = outputColumnMapping.get(i); - String cf = columnFamilies.get(cfIndex); - // We skip the key column. - if (cf.equals(HBaseSerDe.HBASE_KEY_COL) == false) { - String qualifier = columnQualifiers.get(i); - builder.append(cf); - builder.append(":"); - if (qualifier != null) { - builder.append(qualifier); - } - builder.append(" "); - } - } + } + + public static boolean isBulkMode(OutputJobInfo outputJobInfo) { + //Default is false + String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties() + .getProperty(HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY, + "false"); + return "true".equals(bulkMode); + } + + private String getScanColumns(HCatTableInfo tableInfo, String outputColSchema) throws IOException { + StringBuilder builder = new StringBuilder(); + String hbaseColumnMapping = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); + if (outputColSchema == null) { + String[] splits = hbaseColumnMapping.split("[,]"); + for (int i = 0; i < splits.length; i++) { + if (!splits[i].equals(HBaseSerDe.HBASE_KEY_COL)) + builder.append(splits[i]).append(" "); + } + } else { + HCatSchema outputSchema = (HCatSchema) HCatUtil.deserialize(outputColSchema); + HCatSchema tableSchema = tableInfo.getDataColumns(); + List outputFieldNames = outputSchema.getFieldNames(); + List outputColumnMapping = new ArrayList(); + for (String fieldName : outputFieldNames) { + int position = tableSchema.getPosition(fieldName); + outputColumnMapping.add(position); + } + List columnFamilies = new ArrayList(); + List columnQualifiers = new ArrayList(); + HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, null, + columnQualifiers, null); + for (int i = 0; i < outputColumnMapping.size(); i++) { + int cfIndex = outputColumnMapping.get(i); + String cf = columnFamilies.get(cfIndex); + // We skip the key column. + if (cf.equals(HBaseSerDe.HBASE_KEY_COL) == false) { + String qualifier = columnQualifiers.get(i); + builder.append(cf); + builder.append(":"); + if (qualifier != null) { + builder.append(qualifier); + } + builder.append(" "); } - //Remove the extra space delimiter - builder.deleteCharAt(builder.length() - 1); - return builder.toString(); + } } + //Remove the extra space delimiter + builder.deleteCharAt(builder.length() - 1); + return builder.toString(); + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java index 8c0816c..3fe468c 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java @@ -43,84 +43,84 @@ */ class HBaseInputFormat implements InputFormat { - private final TableInputFormat inputFormat; + private final TableInputFormat inputFormat; - public HBaseInputFormat() { - inputFormat = new TableInputFormat(); - } + public HBaseInputFormat() { + inputFormat = new TableInputFormat(); + } - /* - * @param instance of InputSplit - * - * @param instance of TaskAttemptContext - * - * @return RecordReader - * - * @throws IOException - * - * @throws InterruptedException - * - * @see - * org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache - * .hadoop.mapreduce.InputSplit, - * org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public RecordReader getRecordReader( - InputSplit split, JobConf job, Reporter reporter) - throws IOException { - String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO); - InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); + /* + * @param instance of InputSplit + * + * @param instance of TaskAttemptContext + * + * @return RecordReader + * + * @throws IOException + * + * @throws InterruptedException + * + * @see + * org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache + * .hadoop.mapreduce.InputSplit, + * org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public RecordReader getRecordReader( + InputSplit split, JobConf job, Reporter reporter) + throws IOException { + String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); - String tableName = job.get(TableInputFormat.INPUT_TABLE); - TableSplit tSplit = (TableSplit) split; - HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job); - inputFormat.setConf(job); - Scan inputScan = inputFormat.getScan(); - // TODO: Make the caching configurable by the user - inputScan.setCaching(200); - inputScan.setCacheBlocks(false); - Scan sc = new Scan(inputScan); - sc.setStartRow(tSplit.getStartRow()); - sc.setStopRow(tSplit.getEndRow()); - recordReader.setScan(sc); - recordReader.setHTable(new HTable(job, tableName)); - recordReader.init(); - return recordReader; - } + String tableName = job.get(TableInputFormat.INPUT_TABLE); + TableSplit tSplit = (TableSplit) split; + HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job); + inputFormat.setConf(job); + Scan inputScan = inputFormat.getScan(); + // TODO: Make the caching configurable by the user + inputScan.setCaching(200); + inputScan.setCacheBlocks(false); + Scan sc = new Scan(inputScan); + sc.setStartRow(tSplit.getStartRow()); + sc.setStopRow(tSplit.getEndRow()); + recordReader.setScan(sc); + recordReader.setHTable(new HTable(job, tableName)); + recordReader.init(); + return recordReader; + } - /* - * @param jobContext - * - * @return List of InputSplit - * - * @throws IOException - * - * @throws InterruptedException - * - * @see - * org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce - * .JobContext) - */ - @Override - public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) - throws IOException { - inputFormat.setConf(job); - return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, - Reporter.NULL))); - } + /* + * @param jobContext + * + * @return List of InputSplit + * + * @throws IOException + * + * @throws InterruptedException + * + * @see + * org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce + * .JobContext) + */ + @Override + public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) + throws IOException { + inputFormat.setConf(job); + return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, + Reporter.NULL))); + } - private InputSplit[] convertSplits(List splits) { - InputSplit[] converted = new InputSplit[splits.size()]; - for (int i = 0; i < splits.size(); i++) { - org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit = - (org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i); - TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(), - tableSplit.getStartRow(), - tableSplit.getEndRow(), tableSplit.getRegionLocation()); - converted[i] = newTableSplit; - } - return converted; + private InputSplit[] convertSplits(List splits) { + InputSplit[] converted = new InputSplit[splits.size()]; + for (int i = 0; i < splits.size(); i++) { + org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit = + (org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i); + TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(), + tableSplit.getStartRow(), + tableSplit.getEndRow(), tableSplit.getRegionLocation()); + converted[i] = newTableSplit; } + return converted; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java index 02d99df..ba6271c 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java @@ -51,207 +51,207 @@ */ class HBaseRevisionManagerUtil { - private final static Logger LOG = LoggerFactory.getLogger(HBaseRevisionManagerUtil.class); - - private HBaseRevisionManagerUtil() { - } - - /** - * Creates the latest snapshot of the table. - * - * @param jobConf The job configuration. - * @param hbaseTableName The fully qualified name of the HBase table. - * @param tableInfo HCat table information - * @return An instance of HCatTableSnapshot - * @throws IOException Signals that an I/O exception has occurred. - */ - static HCatTableSnapshot createSnapshot(Configuration jobConf, - String hbaseTableName, HCatTableInfo tableInfo) throws IOException { - - RevisionManager rm = null; - TableSnapshot snpt; - try { - rm = getOpenedRevisionManager(jobConf); - snpt = rm.createSnapshot(hbaseTableName); - } finally { - closeRevisionManagerQuietly(rm); - } - - HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(snpt, tableInfo); - return hcatSnapshot; + private final static Logger LOG = LoggerFactory.getLogger(HBaseRevisionManagerUtil.class); + + private HBaseRevisionManagerUtil() { + } + + /** + * Creates the latest snapshot of the table. + * + * @param jobConf The job configuration. + * @param hbaseTableName The fully qualified name of the HBase table. + * @param tableInfo HCat table information + * @return An instance of HCatTableSnapshot + * @throws IOException Signals that an I/O exception has occurred. + */ + static HCatTableSnapshot createSnapshot(Configuration jobConf, + String hbaseTableName, HCatTableInfo tableInfo) throws IOException { + + RevisionManager rm = null; + TableSnapshot snpt; + try { + rm = getOpenedRevisionManager(jobConf); + snpt = rm.createSnapshot(hbaseTableName); + } finally { + closeRevisionManagerQuietly(rm); } - /** - * Creates the snapshot using the revision specified by the user. - * - * @param jobConf The job configuration. - * @param tableName The fully qualified name of the table whose snapshot is being taken. - * @param revision The revision number to use for the snapshot. - * @return An instance of HCatTableSnapshot. - * @throws IOException Signals that an I/O exception has occurred. - */ - static HCatTableSnapshot createSnapshot(Configuration jobConf, - String tableName, long revision) - throws IOException { - - TableSnapshot snpt; - RevisionManager rm = null; - try { - rm = getOpenedRevisionManager(jobConf); - snpt = rm.createSnapshot(tableName, revision); - } finally { - closeRevisionManagerQuietly(rm); - } - - String inputJobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); - if (inputJobString == null) { - throw new IOException( - "InputJobInfo information not found in JobContext. " - + "HCatInputFormat.setInput() not called?"); - } - InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(inputJobString); - HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil - .convertSnapshot(snpt, inputInfo.getTableInfo()); - - return hcatSnapshot; + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(snpt, tableInfo); + return hcatSnapshot; + } + + /** + * Creates the snapshot using the revision specified by the user. + * + * @param jobConf The job configuration. + * @param tableName The fully qualified name of the table whose snapshot is being taken. + * @param revision The revision number to use for the snapshot. + * @return An instance of HCatTableSnapshot. + * @throws IOException Signals that an I/O exception has occurred. + */ + static HCatTableSnapshot createSnapshot(Configuration jobConf, + String tableName, long revision) + throws IOException { + + TableSnapshot snpt; + RevisionManager rm = null; + try { + rm = getOpenedRevisionManager(jobConf); + snpt = rm.createSnapshot(tableName, revision); + } finally { + closeRevisionManagerQuietly(rm); } - /** - * Gets an instance of revision manager which is opened. - * - * @param jobConf The job configuration. - * @return RevisionManager An instance of revision manager. - * @throws IOException - */ - static RevisionManager getOpenedRevisionManager(Configuration jobConf) throws IOException { - return RevisionManagerFactory.getOpenedRevisionManager(jobConf); + String inputJobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); + if (inputJobString == null) { + throw new IOException( + "InputJobInfo information not found in JobContext. " + + "HCatInputFormat.setInput() not called?"); } - - static void closeRevisionManagerQuietly(RevisionManager rm) { - if (rm != null) { - try { - rm.close(); - } catch (IOException e) { - LOG.warn("Error while trying to close revision manager", e); - } - } - } - - - static HCatTableSnapshot convertSnapshot(TableSnapshot hbaseSnapshot, - HCatTableInfo hcatTableInfo) throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); - HashMap revisionMap = new HashMap(); - - for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { - if (hcatHbaseColMap.containsKey(fSchema.getName())) { - String colFamily = hcatHbaseColMap.get(fSchema.getName()); - long revisionID = hbaseSnapshot.getRevision(colFamily); - revisionMap.put(fSchema.getName(), revisionID); - } - } - - HCatTableSnapshot hcatSnapshot = new HCatTableSnapshot( - hcatTableInfo.getDatabaseName(), hcatTableInfo.getTableName(), revisionMap, hbaseSnapshot.getLatestRevision()); - return hcatSnapshot; + InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(inputJobString); + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil + .convertSnapshot(snpt, inputInfo.getTableInfo()); + + return hcatSnapshot; + } + + /** + * Gets an instance of revision manager which is opened. + * + * @param jobConf The job configuration. + * @return RevisionManager An instance of revision manager. + * @throws IOException + */ + static RevisionManager getOpenedRevisionManager(Configuration jobConf) throws IOException { + return RevisionManagerFactory.getOpenedRevisionManager(jobConf); + } + + static void closeRevisionManagerQuietly(RevisionManager rm) { + if (rm != null) { + try { + rm.close(); + } catch (IOException e) { + LOG.warn("Error while trying to close revision manager", e); + } } + } - static TableSnapshot convertSnapshot(HCatTableSnapshot hcatSnapshot, - HCatTableInfo hcatTableInfo) throws IOException { - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - Map revisionMap = new HashMap(); - Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); - for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { - String colFamily = hcatHbaseColMap.get(fSchema.getName()); - if (hcatSnapshot.containsColumn(fSchema.getName())) { - long revision = hcatSnapshot.getRevision(fSchema.getName()); - revisionMap.put(colFamily, revision); - } - } + static HCatTableSnapshot convertSnapshot(TableSnapshot hbaseSnapshot, + HCatTableInfo hcatTableInfo) throws IOException { - String fullyQualifiedName = hcatSnapshot.getDatabaseName() + "." - + hcatSnapshot.getTableName(); - return new TableSnapshot(fullyQualifiedName, revisionMap, hcatSnapshot.getLatestRevision()); + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); + HashMap revisionMap = new HashMap(); + for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { + if (hcatHbaseColMap.containsKey(fSchema.getName())) { + String colFamily = hcatHbaseColMap.get(fSchema.getName()); + long revisionID = hbaseSnapshot.getRevision(colFamily); + revisionMap.put(fSchema.getName(), revisionID); + } } - /** - * Begins a transaction in the revision manager for the given table. - * @param qualifiedTableName Name of the table - * @param tableInfo HCat Table information - * @param jobConf Job Configuration - * @return The new transaction in revision manager - * @throws IOException - */ - static Transaction beginWriteTransaction(String qualifiedTableName, - HCatTableInfo tableInfo, Configuration jobConf) throws IOException { - Transaction txn; - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(jobConf); - String hBaseColumns = tableInfo.getStorerInfo().getProperties() - .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); - String[] splits = hBaseColumns.split("[,:]"); - Set families = new HashSet(); - for (int i = 0; i < splits.length; i += 2) { - if (!splits[i].isEmpty()) - families.add(splits[i]); - } - txn = rm.beginWriteTransaction(qualifiedTableName, new ArrayList(families)); - } finally { - HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); - } - return txn; + HCatTableSnapshot hcatSnapshot = new HCatTableSnapshot( + hcatTableInfo.getDatabaseName(), hcatTableInfo.getTableName(), revisionMap, hbaseSnapshot.getLatestRevision()); + return hcatSnapshot; + } + + static TableSnapshot convertSnapshot(HCatTableSnapshot hcatSnapshot, + HCatTableInfo hcatTableInfo) throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + Map revisionMap = new HashMap(); + Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); + for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { + String colFamily = hcatHbaseColMap.get(fSchema.getName()); + if (hcatSnapshot.containsColumn(fSchema.getName())) { + long revision = hcatSnapshot.getRevision(fSchema.getName()); + revisionMap.put(colFamily, revision); + } } - static Transaction getWriteTransaction(Configuration conf) throws IOException { - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - return (Transaction) HCatUtil.deserialize(outputJobInfo.getProperties() - .getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); + String fullyQualifiedName = hcatSnapshot.getDatabaseName() + "." + + hcatSnapshot.getTableName(); + return new TableSnapshot(fullyQualifiedName, revisionMap, hcatSnapshot.getLatestRevision()); + + } + + /** + * Begins a transaction in the revision manager for the given table. + * @param qualifiedTableName Name of the table + * @param tableInfo HCat Table information + * @param jobConf Job Configuration + * @return The new transaction in revision manager + * @throws IOException + */ + static Transaction beginWriteTransaction(String qualifiedTableName, + HCatTableInfo tableInfo, Configuration jobConf) throws IOException { + Transaction txn; + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(jobConf); + String hBaseColumns = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); + String[] splits = hBaseColumns.split("[,:]"); + Set families = new HashSet(); + for (int i = 0; i < splits.length; i += 2) { + if (!splits[i].isEmpty()) + families.add(splits[i]); + } + txn = rm.beginWriteTransaction(qualifiedTableName, new ArrayList(families)); + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); } - - static void setWriteTransaction(Configuration conf, Transaction txn) throws IOException { - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + return txn; + } + + static Transaction getWriteTransaction(Configuration conf) throws IOException { + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + return (Transaction) HCatUtil.deserialize(outputJobInfo.getProperties() + .getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); + } + + static void setWriteTransaction(Configuration conf, Transaction txn) throws IOException { + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); + conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + } + + /** + * Get the Revision number that will be assigned to this job's output data + * @param conf configuration of the job + * @return the revision number used + * @throws IOException + */ + static long getOutputRevision(Configuration conf) throws IOException { + return getWriteTransaction(conf).getRevisionNumber(); + } + + private static Map getHCatHBaseColumnMapping(HCatTableInfo hcatTableInfo) + throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + StorerInfo storeInfo = hcatTableInfo.getStorerInfo(); + String hbaseColumnMapping = storeInfo.getProperties().getProperty( + HBaseSerDe.HBASE_COLUMNS_MAPPING); + + Map hcatHbaseColMap = new HashMap(); + List columnFamilies = new ArrayList(); + List columnQualifiers = new ArrayList(); + HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, + null, columnQualifiers, null); + + for (HCatFieldSchema column : hcatTableSchema.getFields()) { + int fieldPos = hcatTableSchema.getPosition(column.getName()); + String colFamily = columnFamilies.get(fieldPos); + if (colFamily.equals(HBaseSerDe.HBASE_KEY_COL) == false) { + hcatHbaseColMap.put(column.getName(), colFamily); + } } - /** - * Get the Revision number that will be assigned to this job's output data - * @param conf configuration of the job - * @return the revision number used - * @throws IOException - */ - static long getOutputRevision(Configuration conf) throws IOException { - return getWriteTransaction(conf).getRevisionNumber(); - } - - private static Map getHCatHBaseColumnMapping(HCatTableInfo hcatTableInfo) - throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - StorerInfo storeInfo = hcatTableInfo.getStorerInfo(); - String hbaseColumnMapping = storeInfo.getProperties().getProperty( - HBaseSerDe.HBASE_COLUMNS_MAPPING); - - Map hcatHbaseColMap = new HashMap(); - List columnFamilies = new ArrayList(); - List columnQualifiers = new ArrayList(); - HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, - null, columnQualifiers, null); - - for (HCatFieldSchema column : hcatTableSchema.getFields()) { - int fieldPos = hcatTableSchema.getPosition(column.getName()); - String colFamily = columnFamilies.get(fieldPos); - if (colFamily.equals(HBaseSerDe.HBASE_KEY_COL) == false) { - hcatHbaseColMap.put(column.getName(), colFamily); - } - } - - return hcatHbaseColMap; - } + return hcatHbaseColMap; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseUtil.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseUtil.java index 27e165f..da5e5bb 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseUtil.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseUtil.java @@ -29,131 +29,131 @@ class HBaseUtil { - private HBaseUtil() { + private HBaseUtil() { + } + + /** + * Parses the HBase columns mapping to identify the column families, qualifiers + * and also caches the byte arrays corresponding to them. One of the HCat table + * columns maps to the HBase row key, by default the first column. + * + * @param columnMapping - the column mapping specification to be parsed + * @param colFamilies - the list of HBase column family names + * @param colFamiliesBytes - the corresponding byte array + * @param colQualifiers - the list of HBase column qualifier names + * @param colQualifiersBytes - the corresponding byte array + * @return the row key index in the column names list + * @throws IOException + */ + static int parseColumnMapping( + String columnMapping, + List colFamilies, + List colFamiliesBytes, + List colQualifiers, + List colQualifiersBytes) throws IOException { + + int rowKeyIndex = -1; + + if (colFamilies == null || colQualifiers == null) { + throw new IllegalArgumentException("Error: caller must pass in lists for the column families " + + "and qualifiers."); } - /** - * Parses the HBase columns mapping to identify the column families, qualifiers - * and also caches the byte arrays corresponding to them. One of the HCat table - * columns maps to the HBase row key, by default the first column. - * - * @param columnMapping - the column mapping specification to be parsed - * @param colFamilies - the list of HBase column family names - * @param colFamiliesBytes - the corresponding byte array - * @param colQualifiers - the list of HBase column qualifier names - * @param colQualifiersBytes - the corresponding byte array - * @return the row key index in the column names list - * @throws IOException - */ - static int parseColumnMapping( - String columnMapping, - List colFamilies, - List colFamiliesBytes, - List colQualifiers, - List colQualifiersBytes) throws IOException { - - int rowKeyIndex = -1; - - if (colFamilies == null || colQualifiers == null) { - throw new IllegalArgumentException("Error: caller must pass in lists for the column families " + - "and qualifiers."); - } - - colFamilies.clear(); - colQualifiers.clear(); + colFamilies.clear(); + colQualifiers.clear(); - if (columnMapping == null) { - throw new IllegalArgumentException("Error: hbase.columns.mapping missing for this HBase table."); - } - - if (columnMapping.equals("") || columnMapping.equals(HBaseSerDe.HBASE_KEY_COL)) { - throw new IllegalArgumentException("Error: hbase.columns.mapping specifies only the HBase table" - + " row key. A valid Hive-HBase table must specify at least one additional column."); - } + if (columnMapping == null) { + throw new IllegalArgumentException("Error: hbase.columns.mapping missing for this HBase table."); + } - String[] mapping = columnMapping.split(","); - - for (int i = 0; i < mapping.length; i++) { - String elem = mapping[i]; - int idxFirst = elem.indexOf(":"); - int idxLast = elem.lastIndexOf(":"); - - if (idxFirst < 0 || !(idxFirst == idxLast)) { - throw new IllegalArgumentException("Error: the HBase columns mapping contains a badly formed " + - "column family, column qualifier specification."); - } - - if (elem.equals(HBaseSerDe.HBASE_KEY_COL)) { - rowKeyIndex = i; - colFamilies.add(elem); - colQualifiers.add(null); - } else { - String[] parts = elem.split(":"); - assert (parts.length > 0 && parts.length <= 2); - colFamilies.add(parts[0]); - - if (parts.length == 2) { - colQualifiers.add(parts[1]); - } else { - colQualifiers.add(null); - } - } - } + if (columnMapping.equals("") || columnMapping.equals(HBaseSerDe.HBASE_KEY_COL)) { + throw new IllegalArgumentException("Error: hbase.columns.mapping specifies only the HBase table" + + " row key. A valid Hive-HBase table must specify at least one additional column."); + } - if (rowKeyIndex == -1) { - colFamilies.add(0, HBaseSerDe.HBASE_KEY_COL); - colQualifiers.add(0, null); - rowKeyIndex = 0; + String[] mapping = columnMapping.split(","); + + for (int i = 0; i < mapping.length; i++) { + String elem = mapping[i]; + int idxFirst = elem.indexOf(":"); + int idxLast = elem.lastIndexOf(":"); + + if (idxFirst < 0 || !(idxFirst == idxLast)) { + throw new IllegalArgumentException("Error: the HBase columns mapping contains a badly formed " + + "column family, column qualifier specification."); + } + + if (elem.equals(HBaseSerDe.HBASE_KEY_COL)) { + rowKeyIndex = i; + colFamilies.add(elem); + colQualifiers.add(null); + } else { + String[] parts = elem.split(":"); + assert (parts.length > 0 && parts.length <= 2); + colFamilies.add(parts[0]); + + if (parts.length == 2) { + colQualifiers.add(parts[1]); + } else { + colQualifiers.add(null); } + } + } - if (colFamilies.size() != colQualifiers.size()) { - throw new IOException("Error in parsing the hbase columns mapping."); - } + if (rowKeyIndex == -1) { + colFamilies.add(0, HBaseSerDe.HBASE_KEY_COL); + colQualifiers.add(0, null); + rowKeyIndex = 0; + } - // populate the corresponding byte [] if the client has passed in a non-null list - if (colFamiliesBytes != null) { - colFamiliesBytes.clear(); + if (colFamilies.size() != colQualifiers.size()) { + throw new IOException("Error in parsing the hbase columns mapping."); + } - for (String fam : colFamilies) { - colFamiliesBytes.add(Bytes.toBytes(fam)); - } - } + // populate the corresponding byte [] if the client has passed in a non-null list + if (colFamiliesBytes != null) { + colFamiliesBytes.clear(); - if (colQualifiersBytes != null) { - colQualifiersBytes.clear(); + for (String fam : colFamilies) { + colFamiliesBytes.add(Bytes.toBytes(fam)); + } + } - for (String qual : colQualifiers) { - if (qual == null) { - colQualifiersBytes.add(null); - } else { - colQualifiersBytes.add(Bytes.toBytes(qual)); - } - } - } + if (colQualifiersBytes != null) { + colQualifiersBytes.clear(); - if (colFamiliesBytes != null && colQualifiersBytes != null) { - if (colFamiliesBytes.size() != colQualifiersBytes.size()) { - throw new IOException("Error in caching the bytes for the hbase column families " + - "and qualifiers."); - } + for (String qual : colQualifiers) { + if (qual == null) { + colQualifiersBytes.add(null); + } else { + colQualifiersBytes.add(Bytes.toBytes(qual)); } + } + } - return rowKeyIndex; + if (colFamiliesBytes != null && colQualifiersBytes != null) { + if (colFamiliesBytes.size() != colQualifiersBytes.size()) { + throw new IOException("Error in caching the bytes for the hbase column families " + + "and qualifiers."); + } } - /** - * Get delegation token from hbase and add it to JobConf - * @param job - * @throws IOException - */ - static void addHBaseDelegationToken(JobConf job) throws IOException { - if (User.isHBaseSecurityEnabled(job)) { - try { - User.getCurrent().obtainAuthTokenForJob(job); - } catch (InterruptedException e) { - throw new IOException("Error while obtaining hbase delegation token", e); - } - } + return rowKeyIndex; + } + + /** + * Get delegation token from hbase and add it to JobConf + * @param job + * @throws IOException + */ + static void addHBaseDelegationToken(JobConf job) throws IOException { + if (User.isHBaseSecurityEnabled(job)) { + try { + User.getCurrent().obtainAuthTokenForJob(job); + } catch (InterruptedException e) { + throw new IOException("Error while obtaining hbase delegation token", e); + } } + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HCatTableSnapshot.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HCatTableSnapshot.java index 61a439e..147141e 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HCatTableSnapshot.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HCatTableSnapshot.java @@ -30,63 +30,63 @@ */ public class HCatTableSnapshot implements Serializable { - private static final long serialVersionUID = 1L; - private String tableName; - private String databaseName; - private Map columnMap; - private long latestRevision; + private static final long serialVersionUID = 1L; + private String tableName; + private String databaseName; + private Map columnMap; + private long latestRevision; - HCatTableSnapshot(String databaseName, String tableName, Map columnMap, long latestRevision) { - this.tableName = tableName; - this.databaseName = databaseName; - this.columnMap = columnMap; - this.latestRevision = latestRevision; - } + HCatTableSnapshot(String databaseName, String tableName, Map columnMap, long latestRevision) { + this.tableName = tableName; + this.databaseName = databaseName; + this.columnMap = columnMap; + this.latestRevision = latestRevision; + } - /** - * @return The name of the table in the snapshot. - */ - public String getTableName() { - return this.tableName; - } + /** + * @return The name of the table in the snapshot. + */ + public String getTableName() { + return this.tableName; + } - /** - * @return The name of the database to which the table snapshot belongs. - */ - public String getDatabaseName() { - return this.databaseName; - } + /** + * @return The name of the database to which the table snapshot belongs. + */ + public String getDatabaseName() { + return this.databaseName; + } - /** - * @return The revision number of a column in a snapshot. - */ - long getRevision(String column) { - if (columnMap.containsKey(column)) - return this.columnMap.get(column); - return latestRevision; - } + /** + * @return The revision number of a column in a snapshot. + */ + long getRevision(String column) { + if (columnMap.containsKey(column)) + return this.columnMap.get(column); + return latestRevision; + } - /** - * The method checks if the snapshot contains information about a data column. - * - * @param column The data column of the table - * @return true, if successful - */ - boolean containsColumn(String column) { - return this.columnMap.containsKey(column); - } + /** + * The method checks if the snapshot contains information about a data column. + * + * @param column The data column of the table + * @return true, if successful + */ + boolean containsColumn(String column) { + return this.columnMap.containsKey(column); + } - /** - * @return latest committed revision when snapshot was taken - */ - long getLatestRevision() { - return latestRevision; - } + /** + * @return latest committed revision when snapshot was taken + */ + long getLatestRevision() { + return latestRevision; + } - @Override - public String toString() { - String snapshot = " Database Name: " + this.databaseName + " Table Name : " + tableName + - "Latest Revision: " + latestRevision + " Column revision : " + columnMap.toString(); - return snapshot; - } + @Override + public String toString() { + String snapshot = " Database Name: " + this.databaseName + " Table Name : " + tableName + + "Latest Revision: " + latestRevision + " Column revision : " + columnMap.toString(); + return snapshot; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java index 4f0e30c..7d44334 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java @@ -52,204 +52,204 @@ */ class HbaseSnapshotRecordReader implements RecordReader { - static final Logger LOG = LoggerFactory.getLogger(HbaseSnapshotRecordReader.class); - private final InputJobInfo inpJobInfo; - private final Configuration conf; - private final int maxRevisions = 1; - private ResultScanner scanner; - private Scan scan; - private HTable htable; - private TableSnapshot snapshot; - private Iterator resultItr; - private Set allAbortedTransactions; - private DataOutputBuffer valueOut = new DataOutputBuffer(); - private DataInputBuffer valueIn = new DataInputBuffer(); - - HbaseSnapshotRecordReader(InputJobInfo inputJobInfo, Configuration conf) throws IOException { - this.inpJobInfo = inputJobInfo; - this.conf = conf; - String snapshotString = conf.get(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); - HCatTableSnapshot hcatSnapshot = (HCatTableSnapshot) HCatUtil - .deserialize(snapshotString); - this.snapshot = HBaseRevisionManagerUtil.convertSnapshot(hcatSnapshot, - inpJobInfo.getTableInfo()); + static final Logger LOG = LoggerFactory.getLogger(HbaseSnapshotRecordReader.class); + private final InputJobInfo inpJobInfo; + private final Configuration conf; + private final int maxRevisions = 1; + private ResultScanner scanner; + private Scan scan; + private HTable htable; + private TableSnapshot snapshot; + private Iterator resultItr; + private Set allAbortedTransactions; + private DataOutputBuffer valueOut = new DataOutputBuffer(); + private DataInputBuffer valueIn = new DataInputBuffer(); + + HbaseSnapshotRecordReader(InputJobInfo inputJobInfo, Configuration conf) throws IOException { + this.inpJobInfo = inputJobInfo; + this.conf = conf; + String snapshotString = conf.get(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); + HCatTableSnapshot hcatSnapshot = (HCatTableSnapshot) HCatUtil + .deserialize(snapshotString); + this.snapshot = HBaseRevisionManagerUtil.convertSnapshot(hcatSnapshot, + inpJobInfo.getTableInfo()); + } + + public void init() throws IOException { + restart(scan.getStartRow()); + } + + public void restart(byte[] firstRow) throws IOException { + allAbortedTransactions = getAbortedTransactions(Bytes.toString(htable.getTableName()), scan); + long maxValidRevision = getMaximumRevision(scan, snapshot); + while (allAbortedTransactions.contains(maxValidRevision)) { + maxValidRevision--; } - - public void init() throws IOException { - restart(scan.getStartRow()); - } - - public void restart(byte[] firstRow) throws IOException { - allAbortedTransactions = getAbortedTransactions(Bytes.toString(htable.getTableName()), scan); - long maxValidRevision = getMaximumRevision(scan, snapshot); - while (allAbortedTransactions.contains(maxValidRevision)) { - maxValidRevision--; + Scan newScan = new Scan(scan); + newScan.setStartRow(firstRow); + //TODO: See if filters in 0.92 can be used to optimize the scan + //TODO: Consider create a custom snapshot filter + //TODO: Make min revision a constant in RM + newScan.setTimeRange(0, maxValidRevision + 1); + newScan.setMaxVersions(); + this.scanner = this.htable.getScanner(newScan); + resultItr = this.scanner.iterator(); + } + + private Set getAbortedTransactions(String tableName, Scan scan) throws IOException { + Set abortedTransactions = new HashSet(); + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + byte[][] families = scan.getFamilies(); + for (byte[] familyKey : families) { + String family = Bytes.toString(familyKey); + List abortedWriteTransactions = rm.getAbortedWriteTransactions( + tableName, family); + if (abortedWriteTransactions != null) { + for (FamilyRevision revision : abortedWriteTransactions) { + abortedTransactions.add(revision.getRevision()); + } } - Scan newScan = new Scan(scan); - newScan.setStartRow(firstRow); - //TODO: See if filters in 0.92 can be used to optimize the scan - //TODO: Consider create a custom snapshot filter - //TODO: Make min revision a constant in RM - newScan.setTimeRange(0, maxValidRevision + 1); - newScan.setMaxVersions(); - this.scanner = this.htable.getScanner(newScan); - resultItr = this.scanner.iterator(); + } + return abortedTransactions; + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); } - - private Set getAbortedTransactions(String tableName, Scan scan) throws IOException { - Set abortedTransactions = new HashSet(); - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - byte[][] families = scan.getFamilies(); - for (byte[] familyKey : families) { - String family = Bytes.toString(familyKey); - List abortedWriteTransactions = rm.getAbortedWriteTransactions( - tableName, family); - if (abortedWriteTransactions != null) { - for (FamilyRevision revision : abortedWriteTransactions) { - abortedTransactions.add(revision.getRevision()); - } - } - } - return abortedTransactions; - } finally { - HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); - } + } + + private long getMaximumRevision(Scan scan, TableSnapshot snapshot) { + long maxRevision = 0; + byte[][] families = scan.getFamilies(); + for (byte[] familyKey : families) { + String family = Bytes.toString(familyKey); + long revision = snapshot.getRevision(family); + if (revision > maxRevision) + maxRevision = revision; } - - private long getMaximumRevision(Scan scan, TableSnapshot snapshot) { - long maxRevision = 0; - byte[][] families = scan.getFamilies(); - for (byte[] familyKey : families) { - String family = Bytes.toString(familyKey); - long revision = snapshot.getRevision(family); - if (revision > maxRevision) - maxRevision = revision; + return maxRevision; + } + + /* + * @param htable The HTable ( of HBase) to use for the record reader. + * + */ + public void setHTable(HTable htable) { + this.htable = htable; + } + + /* + * @param scan The scan to be used for reading records. + * + */ + public void setScan(Scan scan) { + this.scan = scan; + } + + @Override + public ImmutableBytesWritable createKey() { + return new ImmutableBytesWritable(); + } + + @Override + public Result createValue() { + return new Result(); + } + + @Override + public long getPos() { + // This should be the ordinal tuple in the range; + // not clear how to calculate... + return 0; + } + + @Override + public float getProgress() throws IOException { + // Depends on the total number of tuples + return 0; + } + + @Override + public boolean next(ImmutableBytesWritable key, Result value) throws IOException { + if (this.resultItr == null) { + LOG.warn("The HBase result iterator is found null. It is possible" + + " that the record reader has already been closed."); + } else { + while (resultItr.hasNext()) { + Result temp = resultItr.next(); + Result hbaseRow = prepareResult(temp.list()); + if (hbaseRow != null) { + // Update key and value. Currently no way to avoid serialization/de-serialization + // as no setters are available. + key.set(hbaseRow.getRow()); + valueOut.reset(); + hbaseRow.write(valueOut); + valueIn.reset(valueOut.getData(), valueOut.getLength()); + value.readFields(valueIn); + return true; } - return maxRevision; - } - - /* - * @param htable The HTable ( of HBase) to use for the record reader. - * - */ - public void setHTable(HTable htable) { - this.htable = htable; - } - /* - * @param scan The scan to be used for reading records. - * - */ - public void setScan(Scan scan) { - this.scan = scan; + } } - - @Override - public ImmutableBytesWritable createKey() { - return new ImmutableBytesWritable(); - } - - @Override - public Result createValue() { - return new Result(); + return false; + } + + private Result prepareResult(List keyvalues) { + + List finalKeyVals = new ArrayList(); + Map> qualValMap = new HashMap>(); + for (KeyValue kv : keyvalues) { + byte[] cf = kv.getFamily(); + byte[] qualifier = kv.getQualifier(); + String key = Bytes.toString(cf) + ":" + Bytes.toString(qualifier); + List kvs; + if (qualValMap.containsKey(key)) { + kvs = qualValMap.get(key); + } else { + kvs = new ArrayList(); + } + + String family = Bytes.toString(kv.getFamily()); + //Ignore aborted transactions + if (allAbortedTransactions.contains(kv.getTimestamp())) { + continue; + } + + long desiredTS = snapshot.getRevision(family); + if (kv.getTimestamp() <= desiredTS) { + kvs.add(kv); + } + qualValMap.put(key, kvs); } - @Override - public long getPos() { - // This should be the ordinal tuple in the range; - // not clear how to calculate... - return 0; - } - - @Override - public float getProgress() throws IOException { - // Depends on the total number of tuples - return 0; - } - - @Override - public boolean next(ImmutableBytesWritable key, Result value) throws IOException { - if (this.resultItr == null) { - LOG.warn("The HBase result iterator is found null. It is possible" - + " that the record reader has already been closed."); - } else { - while (resultItr.hasNext()) { - Result temp = resultItr.next(); - Result hbaseRow = prepareResult(temp.list()); - if (hbaseRow != null) { - // Update key and value. Currently no way to avoid serialization/de-serialization - // as no setters are available. - key.set(hbaseRow.getRow()); - valueOut.reset(); - hbaseRow.write(valueOut); - valueIn.reset(valueOut.getData(), valueOut.getLength()); - value.readFields(valueIn); - return true; - } - - } - } - return false; - } - - private Result prepareResult(List keyvalues) { - - List finalKeyVals = new ArrayList(); - Map> qualValMap = new HashMap>(); - for (KeyValue kv : keyvalues) { - byte[] cf = kv.getFamily(); - byte[] qualifier = kv.getQualifier(); - String key = Bytes.toString(cf) + ":" + Bytes.toString(qualifier); - List kvs; - if (qualValMap.containsKey(key)) { - kvs = qualValMap.get(key); - } else { - kvs = new ArrayList(); - } - - String family = Bytes.toString(kv.getFamily()); - //Ignore aborted transactions - if (allAbortedTransactions.contains(kv.getTimestamp())) { - continue; - } - - long desiredTS = snapshot.getRevision(family); - if (kv.getTimestamp() <= desiredTS) { - kvs.add(kv); - } - qualValMap.put(key, kvs); - } - - Set keys = qualValMap.keySet(); - for (String cf : keys) { - List kvs = qualValMap.get(cf); - if (maxRevisions <= kvs.size()) { - for (int i = 0; i < maxRevisions; i++) { - finalKeyVals.add(kvs.get(i)); - } - } else { - finalKeyVals.addAll(kvs); - } - } - - if (finalKeyVals.size() == 0) { - return null; - } else { - KeyValue[] kvArray = new KeyValue[finalKeyVals.size()]; - finalKeyVals.toArray(kvArray); - return new Result(kvArray); + Set keys = qualValMap.keySet(); + for (String cf : keys) { + List kvs = qualValMap.get(cf); + if (maxRevisions <= kvs.size()) { + for (int i = 0; i < maxRevisions; i++) { + finalKeyVals.add(kvs.get(i)); } + } else { + finalKeyVals.addAll(kvs); + } } - /* - * @see org.apache.hadoop.hbase.mapred.TableRecordReader#close() - */ - @Override - public void close() { - this.resultItr = null; - this.scanner.close(); + if (finalKeyVals.size() == 0) { + return null; + } else { + KeyValue[] kvArray = new KeyValue[finalKeyVals.size()]; + finalKeyVals.toArray(kvArray); + return new Result(kvArray); } + } + + /* + * @see org.apache.hadoop.hbase.mapred.TableRecordReader#close() + */ + @Override + public void close() { + this.resultItr = null; + this.scanner.close(); + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ImportSequenceFile.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ImportSequenceFile.java index b9f02af..b9761b9 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ImportSequenceFile.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ImportSequenceFile.java @@ -61,192 +61,192 @@ * and data needs to be bulk loaded onto HBase. */ class ImportSequenceFile { - private final static Logger LOG = LoggerFactory.getLogger(ImportSequenceFile.class); - private final static String NAME = "HCatImportSequenceFile"; - private final static String IMPORTER_WORK_DIR = "_IMPORTER_MR_WORK_DIR"; + private final static Logger LOG = LoggerFactory.getLogger(ImportSequenceFile.class); + private final static String NAME = "HCatImportSequenceFile"; + private final static String IMPORTER_WORK_DIR = "_IMPORTER_MR_WORK_DIR"; + + + private static class SequenceFileImporter extends Mapper { + + @Override + public void map(ImmutableBytesWritable rowKey, Put value, + Context context) + throws IOException { + try { + context.write(new ImmutableBytesWritable(value.getRow()), value); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + private static class ImporterOutputFormat extends HFileOutputFormat { + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { + final OutputCommitter baseOutputCommitter = super.getOutputCommitter(context); - private static class SequenceFileImporter extends Mapper { + return new OutputCommitter() { + @Override + public void setupJob(JobContext jobContext) throws IOException { + baseOutputCommitter.setupJob(jobContext); + } @Override - public void map(ImmutableBytesWritable rowKey, Put value, - Context context) - throws IOException { - try { - context.write(new ImmutableBytesWritable(value.getRow()), value); - } catch (InterruptedException e) { - e.printStackTrace(); - } + public void setupTask(TaskAttemptContext taskContext) throws IOException { + baseOutputCommitter.setupTask(taskContext); } - } - private static class ImporterOutputFormat extends HFileOutputFormat { @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { - final OutputCommitter baseOutputCommitter = super.getOutputCommitter(context); - - return new OutputCommitter() { - @Override - public void setupJob(JobContext jobContext) throws IOException { - baseOutputCommitter.setupJob(jobContext); - } - - @Override - public void setupTask(TaskAttemptContext taskContext) throws IOException { - baseOutputCommitter.setupTask(taskContext); - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { - return baseOutputCommitter.needsTaskCommit(taskContext); - } - - @Override - public void commitTask(TaskAttemptContext taskContext) throws IOException { - baseOutputCommitter.commitTask(taskContext); - } - - @Override - public void abortTask(TaskAttemptContext taskContext) throws IOException { - baseOutputCommitter.abortTask(taskContext); - } - - @Override - public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { - try { - baseOutputCommitter.abortJob(jobContext, state); - } finally { - cleanupScratch(jobContext); - } - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - try { - baseOutputCommitter.commitJob(jobContext); - Configuration conf = jobContext.getConfiguration(); - try { - //import hfiles - new LoadIncrementalHFiles(conf) - .doBulkLoad(HFileOutputFormat.getOutputPath(jobContext), - new HTable(conf, - conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY))); - } catch (Exception e) { - throw new IOException("BulkLoad failed.", e); - } - } finally { - cleanupScratch(jobContext); - } - } - - @Override - public void cleanupJob(JobContext context) throws IOException { - try { - baseOutputCommitter.cleanupJob(context); - } finally { - cleanupScratch(context); - } - } - - private void cleanupScratch(JobContext context) throws IOException { - FileSystem fs = FileSystem.get(context.getConfiguration()); - fs.delete(HFileOutputFormat.getOutputPath(context), true); - } - }; + public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { + return baseOutputCommitter.needsTaskCommit(taskContext); } - } - private static Job createSubmittableJob(Configuration conf, String tableName, Path inputDir, Path scratchDir, boolean localMode) - throws IOException { - Job job = new Job(conf, NAME + "_" + tableName); - job.setJarByClass(SequenceFileImporter.class); - FileInputFormat.setInputPaths(job, inputDir); - job.setInputFormatClass(SequenceFileInputFormat.class); - job.setMapperClass(SequenceFileImporter.class); - - HTable table = new HTable(conf, tableName); - job.setReducerClass(PutSortReducer.class); - FileOutputFormat.setOutputPath(job, scratchDir); - job.setMapOutputKeyClass(ImmutableBytesWritable.class); - job.setMapOutputValueClass(Put.class); - HFileOutputFormat.configureIncrementalLoad(job, table); - //override OutputFormatClass with our own so we can include cleanup in the committer - job.setOutputFormatClass(ImporterOutputFormat.class); - - //local mode doesn't support symbolic links so we have to manually set the actual path - if (localMode) { - String partitionFile = null; - for (URI uri : DistributedCache.getCacheFiles(job.getConfiguration())) { - if (DEFAULT_PATH.equals(uri.getFragment())) { - partitionFile = uri.toString(); - break; - } - } - partitionFile = partitionFile.substring(0, partitionFile.lastIndexOf("#")); - job.getConfiguration().set(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString()); + @Override + public void commitTask(TaskAttemptContext taskContext) throws IOException { + baseOutputCommitter.commitTask(taskContext); } - return job; - } + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException { + baseOutputCommitter.abortTask(taskContext); + } - /** - * Method to run the Importer MapReduce Job. Normally will be called by another MR job - * during OutputCommitter.commitJob(). - * @param parentContext JobContext of the parent job - * @param tableName name of table to bulk load data into - * @param InputDir path of SequenceFile formatted data to read - * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported - * @return - */ - static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) { - Configuration parentConf = parentContext.getConfiguration(); - Configuration conf = new Configuration(); - for (Map.Entry el : parentConf) { - if (el.getKey().startsWith("hbase.")) - conf.set(el.getKey(), el.getValue()); - if (el.getKey().startsWith("mapred.cache.archives")) - conf.set(el.getKey(), el.getValue()); + @Override + public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { + try { + baseOutputCommitter.abortJob(jobContext, state); + } finally { + cleanupScratch(jobContext); + } } - //Inherit jar dependencies added to distributed cache loaded by parent job - conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", "")); - conf.set("mapreduce.job.cache.archives.visibilities", parentConf.get("mapreduce.job.cache.archives.visibilities", "")); + @Override + public void commitJob(JobContext jobContext) throws IOException { + try { + baseOutputCommitter.commitJob(jobContext); + Configuration conf = jobContext.getConfiguration(); + try { + //import hfiles + new LoadIncrementalHFiles(conf) + .doBulkLoad(HFileOutputFormat.getOutputPath(jobContext), + new HTable(conf, + conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY))); + } catch (Exception e) { + throw new IOException("BulkLoad failed.", e); + } + } finally { + cleanupScratch(jobContext); + } + } - //Temporary fix until hbase security is ready - //We need the written HFile to be world readable so - //hbase regionserver user has the privileges to perform a hdfs move - if (parentConf.getBoolean("hadoop.security.authorization", false)) { - FsPermission.setUMask(conf, FsPermission.valueOf("----------")); + @Override + public void cleanupJob(JobContext context) throws IOException { + try { + baseOutputCommitter.cleanupJob(context); + } finally { + cleanupScratch(context); + } } - conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); - conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false); - - boolean localMode = "local".equals(conf.get("mapred.job.tracker")); - - boolean success = false; - try { - FileSystem fs = FileSystem.get(parentConf); - Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR); - if (!fs.mkdirs(workDir)) - throw new IOException("Importer work directory already exists: " + workDir); - Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode); - job.setWorkingDirectory(workDir); - job.getCredentials().addAll(parentContext.getCredentials()); - success = job.waitForCompletion(true); - fs.delete(workDir, true); - //We only cleanup on success because failure might've been caused by existence of target directory - if (localMode && success) { - new ImporterOutputFormat().getOutputCommitter(HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID())).commitJob(job); - } - } catch (InterruptedException e) { - LOG.error("ImportSequenceFile Failed", e); - } catch (ClassNotFoundException e) { - LOG.error("ImportSequenceFile Failed", e); - } catch (IOException e) { - LOG.error("ImportSequenceFile Failed", e); + private void cleanupScratch(JobContext context) throws IOException { + FileSystem fs = FileSystem.get(context.getConfiguration()); + fs.delete(HFileOutputFormat.getOutputPath(context), true); + } + }; + } + } + + private static Job createSubmittableJob(Configuration conf, String tableName, Path inputDir, Path scratchDir, boolean localMode) + throws IOException { + Job job = new Job(conf, NAME + "_" + tableName); + job.setJarByClass(SequenceFileImporter.class); + FileInputFormat.setInputPaths(job, inputDir); + job.setInputFormatClass(SequenceFileInputFormat.class); + job.setMapperClass(SequenceFileImporter.class); + + HTable table = new HTable(conf, tableName); + job.setReducerClass(PutSortReducer.class); + FileOutputFormat.setOutputPath(job, scratchDir); + job.setMapOutputKeyClass(ImmutableBytesWritable.class); + job.setMapOutputValueClass(Put.class); + HFileOutputFormat.configureIncrementalLoad(job, table); + //override OutputFormatClass with our own so we can include cleanup in the committer + job.setOutputFormatClass(ImporterOutputFormat.class); + + //local mode doesn't support symbolic links so we have to manually set the actual path + if (localMode) { + String partitionFile = null; + for (URI uri : DistributedCache.getCacheFiles(job.getConfiguration())) { + if (DEFAULT_PATH.equals(uri.getFragment())) { + partitionFile = uri.toString(); + break; } - return success; + } + partitionFile = partitionFile.substring(0, partitionFile.lastIndexOf("#")); + job.getConfiguration().set(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString()); + } + + return job; + } + + /** + * Method to run the Importer MapReduce Job. Normally will be called by another MR job + * during OutputCommitter.commitJob(). + * @param parentContext JobContext of the parent job + * @param tableName name of table to bulk load data into + * @param InputDir path of SequenceFile formatted data to read + * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported + * @return + */ + static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) { + Configuration parentConf = parentContext.getConfiguration(); + Configuration conf = new Configuration(); + for (Map.Entry el : parentConf) { + if (el.getKey().startsWith("hbase.")) + conf.set(el.getKey(), el.getValue()); + if (el.getKey().startsWith("mapred.cache.archives")) + conf.set(el.getKey(), el.getValue()); + } + + //Inherit jar dependencies added to distributed cache loaded by parent job + conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", "")); + conf.set("mapreduce.job.cache.archives.visibilities", parentConf.get("mapreduce.job.cache.archives.visibilities", "")); + + //Temporary fix until hbase security is ready + //We need the written HFile to be world readable so + //hbase regionserver user has the privileges to perform a hdfs move + if (parentConf.getBoolean("hadoop.security.authorization", false)) { + FsPermission.setUMask(conf, FsPermission.valueOf("----------")); + } + + conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); + conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false); + + boolean localMode = "local".equals(conf.get("mapred.job.tracker")); + + boolean success = false; + try { + FileSystem fs = FileSystem.get(parentConf); + Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR); + if (!fs.mkdirs(workDir)) + throw new IOException("Importer work directory already exists: " + workDir); + Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode); + job.setWorkingDirectory(workDir); + job.getCredentials().addAll(parentContext.getCredentials()); + success = job.waitForCompletion(true); + fs.delete(workDir, true); + //We only cleanup on success because failure might've been caused by existence of target directory + if (localMode && success) { + new ImporterOutputFormat().getOutputCommitter(HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID())).commitJob(job); + } + } catch (InterruptedException e) { + LOG.error("ImportSequenceFile Failed", e); + } catch (ClassNotFoundException e) { + LOG.error("ImportSequenceFile Failed", e); + } catch (IOException e) { + LOG.error("ImportSequenceFile Failed", e); } + return success; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ResultConverter.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ResultConverter.java index f15bac7..15ca56b 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ResultConverter.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ResultConverter.java @@ -32,27 +32,27 @@ */ interface ResultConverter { - /** - * convert HCatRecord instance to an HBase Put, used when writing out data. - * @param record instance to convert - * @return converted Put instance - * @throws IOException - */ - Put convert(HCatRecord record) throws IOException; - - /** - * convert HBase Result to HCatRecord instance, used when reading data. - * @param result instance to convert - * @return converted Result instance - * @throws IOException - */ - HCatRecord convert(Result result) throws IOException; - - /** - * Returns the hbase columns that are required for the scan. - * @return String containing hbase columns delimited by space. - * @throws IOException - */ - String getHBaseScanColumns() throws IOException; + /** + * convert HCatRecord instance to an HBase Put, used when writing out data. + * @param record instance to convert + * @return converted Put instance + * @throws IOException + */ + Put convert(HCatRecord record) throws IOException; + + /** + * convert HBase Result to HCatRecord instance, used when reading data. + * @param result instance to convert + * @return converted Result instance + * @throws IOException + */ + HCatRecord convert(Result result) throws IOException; + + /** + * Returns the hbase columns that are required for the scan. + * @return String containing hbase columns delimited by space. + * @throws IOException + */ + String getHBaseScanColumns() throws IOException; } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java index b5103fc..2c783b5 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java @@ -27,45 +27,45 @@ * committed, the transaction object is removed from the list. */ public class FamilyRevision implements - Comparable { + Comparable { - private long revision; + private long revision; - private long timestamp; + private long timestamp; - /** - * Create a FamilyRevision object - * @param rev revision number - * @param ts expiration timestamp - */ - FamilyRevision(long rev, long ts) { - this.revision = rev; - this.timestamp = ts; - } + /** + * Create a FamilyRevision object + * @param rev revision number + * @param ts expiration timestamp + */ + FamilyRevision(long rev, long ts) { + this.revision = rev; + this.timestamp = ts; + } - public long getRevision() { - return revision; - } + public long getRevision() { + return revision; + } - public long getExpireTimestamp() { - return timestamp; - } + public long getExpireTimestamp() { + return timestamp; + } - void setExpireTimestamp(long ts) { - timestamp = ts; - } + void setExpireTimestamp(long ts) { + timestamp = ts; + } - @Override - public String toString() { - String description = "revision: " + revision + " ts: " + timestamp; - return description; - } + @Override + public String toString() { + String description = "revision: " + revision + " ts: " + timestamp; + return description; + } - @Override - public int compareTo(FamilyRevision o) { - long d = revision - o.getRevision(); - return (d < 0) ? -1 : (d > 0) ? 1 : 0; - } + @Override + public int compareTo(FamilyRevision o) { + long d = revision - o.getRevision(); + return (d < 0) ? -1 : (d > 0) ? 1 : 0; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/IDGenerator.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/IDGenerator.java index a427544..7b0edcf 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/IDGenerator.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/IDGenerator.java @@ -36,110 +36,110 @@ */ class IDGenerator implements LockListener { - private ZooKeeper zookeeper; - private String zNodeDataLoc; - private String zNodeLockBasePath; - private long id; - private static final Logger LOG = LoggerFactory.getLogger(IDGenerator.class); - - IDGenerator(ZooKeeper zookeeper, String tableName, String idGenNode) - throws IOException { - this.zookeeper = zookeeper; - this.zNodeDataLoc = idGenNode; - this.zNodeLockBasePath = PathUtil.getLockManagementNode(idGenNode); + private ZooKeeper zookeeper; + private String zNodeDataLoc; + private String zNodeLockBasePath; + private long id; + private static final Logger LOG = LoggerFactory.getLogger(IDGenerator.class); + + IDGenerator(ZooKeeper zookeeper, String tableName, String idGenNode) + throws IOException { + this.zookeeper = zookeeper; + this.zNodeDataLoc = idGenNode; + this.zNodeLockBasePath = PathUtil.getLockManagementNode(idGenNode); + } + + /** + * This method obtains a revision id for a transaction. + * + * @return revision ID + * @throws IOException + */ + public long obtainID() throws IOException { + WriteLock wLock = new WriteLock(zookeeper, zNodeLockBasePath, Ids.OPEN_ACL_UNSAFE); + wLock.setLockListener(this); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException("Unable to obtain lock to obtain id."); + } else { + id = incrementAndReadCounter(); + } + } catch (KeeperException e) { + LOG.warn("Exception while obtaining lock for ID.", e); + throw new IOException("Exception while obtaining lock for ID.", e); + } catch (InterruptedException e) { + LOG.warn("Exception while obtaining lock for ID.", e); + throw new IOException("Exception while obtaining lock for ID.", e); + } finally { + wLock.unlock(); } - - /** - * This method obtains a revision id for a transaction. - * - * @return revision ID - * @throws IOException - */ - public long obtainID() throws IOException { - WriteLock wLock = new WriteLock(zookeeper, zNodeLockBasePath, Ids.OPEN_ACL_UNSAFE); - wLock.setLockListener(this); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException("Unable to obtain lock to obtain id."); - } else { - id = incrementAndReadCounter(); - } - } catch (KeeperException e) { - LOG.warn("Exception while obtaining lock for ID.", e); - throw new IOException("Exception while obtaining lock for ID.", e); - } catch (InterruptedException e) { - LOG.warn("Exception while obtaining lock for ID.", e); - throw new IOException("Exception while obtaining lock for ID.", e); - } finally { - wLock.unlock(); - } - return id; - } - - /** - * This method reads the latest revision ID that has been used. The ID - * returned by this method cannot be used for transaction. - * @return revision ID - * @throws IOException - */ - public long readID() throws IOException { - long curId; - try { - Stat stat = new Stat(); - byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); - curId = Long.parseLong(new String(data, Charset.forName("UTF-8"))); - } catch (KeeperException e) { - LOG.warn("Exception while reading current revision id.", e); - throw new IOException("Exception while reading current revision id.", e); - } catch (InterruptedException e) { - LOG.warn("Exception while reading current revision id.", e); - throw new IOException("Exception while reading current revision id.", e); - } - - return curId; + return id; + } + + /** + * This method reads the latest revision ID that has been used. The ID + * returned by this method cannot be used for transaction. + * @return revision ID + * @throws IOException + */ + public long readID() throws IOException { + long curId; + try { + Stat stat = new Stat(); + byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); + curId = Long.parseLong(new String(data, Charset.forName("UTF-8"))); + } catch (KeeperException e) { + LOG.warn("Exception while reading current revision id.", e); + throw new IOException("Exception while reading current revision id.", e); + } catch (InterruptedException e) { + LOG.warn("Exception while reading current revision id.", e); + throw new IOException("Exception while reading current revision id.", e); } + return curId; + } - private long incrementAndReadCounter() throws IOException { - long curId, usedId; - try { - Stat stat = new Stat(); - byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); - usedId = Long.parseLong((new String(data, Charset.forName("UTF-8")))); - curId = usedId + 1; - String lastUsedID = String.valueOf(curId); - zookeeper.setData(this.zNodeDataLoc, lastUsedID.getBytes(Charset.forName("UTF-8")), -1); + private long incrementAndReadCounter() throws IOException { - } catch (KeeperException e) { - LOG.warn("Exception while incrementing revision id.", e); - throw new IOException("Exception while incrementing revision id. ", e); - } catch (InterruptedException e) { - LOG.warn("Exception while incrementing revision id.", e); - throw new IOException("Exception while incrementing revision id. ", e); - } + long curId, usedId; + try { + Stat stat = new Stat(); + byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); + usedId = Long.parseLong((new String(data, Charset.forName("UTF-8")))); + curId = usedId + 1; + String lastUsedID = String.valueOf(curId); + zookeeper.setData(this.zNodeDataLoc, lastUsedID.getBytes(Charset.forName("UTF-8")), -1); - return curId; + } catch (KeeperException e) { + LOG.warn("Exception while incrementing revision id.", e); + throw new IOException("Exception while incrementing revision id. ", e); + } catch (InterruptedException e) { + LOG.warn("Exception while incrementing revision id.", e); + throw new IOException("Exception while incrementing revision id. ", e); } - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() - */ - @Override - public void lockAcquired() { + return curId; + } + /* + * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() + */ + @Override + public void lockAcquired() { - } - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() - */ - @Override - public void lockReleased() { + } - } + /* + * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() + */ + @Override + public void lockReleased() { + + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/PathUtil.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/PathUtil.java index 993308b..fa6a1b1 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/PathUtil.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/PathUtil.java @@ -36,97 +36,97 @@ */ public class PathUtil { - static final String DATA_DIR = "/data"; - static final String CLOCK_NODE = "/clock"; + static final String DATA_DIR = "/data"; + static final String CLOCK_NODE = "/clock"; - /** - * This method returns the data path associated with the currently - * running transactions of a given table and column/column family. - * @param baseDir - * @param tableName - * @param columnFamily - * @return The path of the running transactions data. - */ - static String getRunningTxnInfoPath(String baseDir, String tableName, - String columnFamily) { - String txnBasePath = getTransactionBasePath(baseDir); - String path = txnBasePath + "/" + tableName + "/" + columnFamily - + "/runningTxns"; - return path; - } + /** + * This method returns the data path associated with the currently + * running transactions of a given table and column/column family. + * @param baseDir + * @param tableName + * @param columnFamily + * @return The path of the running transactions data. + */ + static String getRunningTxnInfoPath(String baseDir, String tableName, + String columnFamily) { + String txnBasePath = getTransactionBasePath(baseDir); + String path = txnBasePath + "/" + tableName + "/" + columnFamily + + "/runningTxns"; + return path; + } - /** - * This method returns the data path associated with the aborted - * transactions of a given table and column/column family. - * @param baseDir The base directory for revision management. - * @param tableName The name of the table. - * @param columnFamily - * @return The path of the aborted transactions data. - */ - static String getAbortInformationPath(String baseDir, String tableName, - String columnFamily) { - String txnBasePath = getTransactionBasePath(baseDir); - String path = txnBasePath + "/" + tableName + "/" + columnFamily - + "/abortData"; - return path; - } + /** + * This method returns the data path associated with the aborted + * transactions of a given table and column/column family. + * @param baseDir The base directory for revision management. + * @param tableName The name of the table. + * @param columnFamily + * @return The path of the aborted transactions data. + */ + static String getAbortInformationPath(String baseDir, String tableName, + String columnFamily) { + String txnBasePath = getTransactionBasePath(baseDir); + String path = txnBasePath + "/" + tableName + "/" + columnFamily + + "/abortData"; + return path; + } - /** - * Gets the revision id node for a given table. - * - * @param baseDir the base dir for revision management. - * @param tableName the table name - * @return the revision id node path. - */ - static String getRevisionIDNode(String baseDir, String tableName) { - String rmBasePath = getTransactionBasePath(baseDir); - String revisionIDNode = rmBasePath + "/" + tableName + "/idgen"; - return revisionIDNode; - } + /** + * Gets the revision id node for a given table. + * + * @param baseDir the base dir for revision management. + * @param tableName the table name + * @return the revision id node path. + */ + static String getRevisionIDNode(String baseDir, String tableName) { + String rmBasePath = getTransactionBasePath(baseDir); + String revisionIDNode = rmBasePath + "/" + tableName + "/idgen"; + return revisionIDNode; + } - /** - * Gets the lock management node for any znode that needs to be locked. - * - * @param path the path of the znode. - * @return the lock management node path. - */ - static String getLockManagementNode(String path) { - String lockNode = path + "_locknode_"; - return lockNode; - } + /** + * Gets the lock management node for any znode that needs to be locked. + * + * @param path the path of the znode. + * @return the lock management node path. + */ + static String getLockManagementNode(String path) { + String lockNode = path + "_locknode_"; + return lockNode; + } - /** - * This method returns the base path for the transaction data. - * - * @param baseDir The base dir for revision management. - * @return The base path for the transaction data. - */ - static String getTransactionBasePath(String baseDir) { - String txnBaseNode = baseDir + DATA_DIR; - return txnBaseNode; - } + /** + * This method returns the base path for the transaction data. + * + * @param baseDir The base dir for revision management. + * @return The base path for the transaction data. + */ + static String getTransactionBasePath(String baseDir) { + String txnBaseNode = baseDir + DATA_DIR; + return txnBaseNode; + } - /** - * Gets the txn data path for a given table. - * - * @param baseDir the base dir for revision management. - * @param tableName the table name - * @return the txn data path for the table. - */ - static String getTxnDataPath(String baseDir, String tableName) { - String txnBasePath = getTransactionBasePath(baseDir); - String path = txnBasePath + "/" + tableName; - return path; - } + /** + * Gets the txn data path for a given table. + * + * @param baseDir the base dir for revision management. + * @param tableName the table name + * @return the txn data path for the table. + */ + static String getTxnDataPath(String baseDir, String tableName) { + String txnBasePath = getTransactionBasePath(baseDir); + String path = txnBasePath + "/" + tableName; + return path; + } - /** - * This method returns the data path for clock node. - * - * @param baseDir - * @return The data path for clock. - */ - static String getClockPath(String baseDir) { - String clockNode = baseDir + CLOCK_NODE; - return clockNode; - } + /** + * This method returns the data path for clock node. + * + * @param baseDir + * @return The data path for clock. + */ + static String getClockPath(String baseDir) { + String clockNode = baseDir + CLOCK_NODE; + return clockNode; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RMConstants.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RMConstants.java index 4d6fa80..ee10005 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RMConstants.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RMConstants.java @@ -20,11 +20,11 @@ package org.apache.hcatalog.hbase.snapshot; public class RMConstants { - public static final String REVISION_MGR_ENDPOINT_IMPL_CLASS = "revision.manager.endpoint.impl.class"; + public static final String REVISION_MGR_ENDPOINT_IMPL_CLASS = "revision.manager.endpoint.impl.class"; - public static final String WRITE_TRANSACTION_TIMEOUT = "revision.manager.writeTxn.timeout"; + public static final String WRITE_TRANSACTION_TIMEOUT = "revision.manager.writeTxn.timeout"; - public static final String ZOOKEEPER_HOSTLIST = "revision.manager.zk.hostList"; + public static final String ZOOKEEPER_HOSTLIST = "revision.manager.zk.hostList"; - public static final String ZOOKEEPER_DATADIR = "revision.manager.zk.dataDir"; + public static final String ZOOKEEPER_DATADIR = "revision.manager.zk.dataDir"; } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java index 4a6f842..ca0f90e 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java @@ -27,122 +27,122 @@ * This interface provides APIs for implementing revision management. */ public interface RevisionManager { - /** - * Version property required by HBase to use this interface - * for CoprocessorProtocol / RPC. - */ - public static final long VERSION = 1L; // do not change - - /** - * Initialize the revision manager. - */ - public void initialize(Configuration conf); - - /** - * Opens the revision manager. - * - * @throws IOException - */ - public void open() throws IOException; - - /** - * Closes the revision manager. - * - * @throws IOException - */ - public void close() throws IOException; - - /** - * Setup revision management for a newly created hbase table. - * @param table the hbase table name - * @param columnFamilies the column families in the table - */ - public void createTable(String table, List columnFamilies) throws IOException; - - /** - * Remove table data from revision manager for a dropped table. - * @param table the hbase table name - */ - public void dropTable(String table) throws IOException; - - /** - * Start the write transaction. - * - * @param table - * @param families - * @return a new Transaction - * @throws IOException - */ - public Transaction beginWriteTransaction(String table, List families) - throws IOException; - - /** - * Start the write transaction. - * - * @param table - * @param families - * @param keepAlive - * @return a new Transaction - * @throws IOException - */ - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException; - - /** - * Commit the write transaction. - * - * @param transaction - * @throws IOException - */ - public void commitWriteTransaction(Transaction transaction) - throws IOException; - - /** - * Abort the write transaction. - * - * @param transaction - * @throws IOException - */ - public void abortWriteTransaction(Transaction transaction) - throws IOException; - - /** - * Get the list of aborted Transactions for a column family - * - * @param table the table name - * @param columnFamily the column family name - * @return a list of aborted WriteTransactions - * @throws java.io.IOException - */ - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException; - - /** - * Create the latest snapshot of the table. - * - * @param tableName - * @return a new snapshot - * @throws IOException - */ - public TableSnapshot createSnapshot(String tableName) throws IOException; - - /** - * Create the snapshot of the table using the revision number. - * - * @param tableName - * @param revision - * @return a new snapshot - * @throws IOException - */ - public TableSnapshot createSnapshot(String tableName, long revision) - throws IOException; - - /** - * Extends the expiration of a transaction by the time indicated by keep alive. - * - * @param transaction - * @throws IOException - */ - public void keepAlive(Transaction transaction) throws IOException; + /** + * Version property required by HBase to use this interface + * for CoprocessorProtocol / RPC. + */ + public static final long VERSION = 1L; // do not change + + /** + * Initialize the revision manager. + */ + public void initialize(Configuration conf); + + /** + * Opens the revision manager. + * + * @throws IOException + */ + public void open() throws IOException; + + /** + * Closes the revision manager. + * + * @throws IOException + */ + public void close() throws IOException; + + /** + * Setup revision management for a newly created hbase table. + * @param table the hbase table name + * @param columnFamilies the column families in the table + */ + public void createTable(String table, List columnFamilies) throws IOException; + + /** + * Remove table data from revision manager for a dropped table. + * @param table the hbase table name + */ + public void dropTable(String table) throws IOException; + + /** + * Start the write transaction. + * + * @param table + * @param families + * @return a new Transaction + * @throws IOException + */ + public Transaction beginWriteTransaction(String table, List families) + throws IOException; + + /** + * Start the write transaction. + * + * @param table + * @param families + * @param keepAlive + * @return a new Transaction + * @throws IOException + */ + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException; + + /** + * Commit the write transaction. + * + * @param transaction + * @throws IOException + */ + public void commitWriteTransaction(Transaction transaction) + throws IOException; + + /** + * Abort the write transaction. + * + * @param transaction + * @throws IOException + */ + public void abortWriteTransaction(Transaction transaction) + throws IOException; + + /** + * Get the list of aborted Transactions for a column family + * + * @param table the table name + * @param columnFamily the column family name + * @return a list of aborted WriteTransactions + * @throws java.io.IOException + */ + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException; + + /** + * Create the latest snapshot of the table. + * + * @param tableName + * @return a new snapshot + * @throws IOException + */ + public TableSnapshot createSnapshot(String tableName) throws IOException; + + /** + * Create the snapshot of the table using the revision number. + * + * @param tableName + * @param revision + * @return a new snapshot + * @throws IOException + */ + public TableSnapshot createSnapshot(String tableName, long revision) + throws IOException; + + /** + * Extends the expiration of a transaction by the time indicated by keep alive. + * + * @param transaction + * @throws IOException + */ + public void keepAlive(Transaction transaction) throws IOException; } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java index d5c4329..7ee5412 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java @@ -25,35 +25,35 @@ public class RevisionManagerConfiguration { - public static Configuration addResources(Configuration conf) { - conf.addDefaultResource("revision-manager-default.xml"); - conf.addResource("revision-manager-site.xml"); - return conf; - } + public static Configuration addResources(Configuration conf) { + conf.addDefaultResource("revision-manager-default.xml"); + conf.addResource("revision-manager-site.xml"); + return conf; + } - /** - * Creates a Configuration with Revision Manager resources - * @return a Configuration with Revision Manager resources - */ - public static Configuration create() { - Configuration conf = new Configuration(); - return addResources(conf); - } + /** + * Creates a Configuration with Revision Manager resources + * @return a Configuration with Revision Manager resources + */ + public static Configuration create() { + Configuration conf = new Configuration(); + return addResources(conf); + } - /** - * Creates a clone of passed configuration. - * @param that Configuration to clone. - * @return a Configuration created with the revision-manager-*.xml files plus - * the given configuration. - */ - public static Configuration create(final Configuration that) { - Configuration conf = create(); - //we need to merge things instead of doing new Configuration(that) - //because of a bug in Configuration wherein the config - //set on the MR fronted will get loaded on the backend as resouce called job.xml - //hence adding resources on the backed could potentially overwrite properties - //set on the frontend which we shouldn't be doing here - HBaseConfiguration.merge(conf, that); - return conf; - } + /** + * Creates a clone of passed configuration. + * @param that Configuration to clone. + * @return a Configuration created with the revision-manager-*.xml files plus + * the given configuration. + */ + public static Configuration create(final Configuration that) { + Configuration conf = create(); + //we need to merge things instead of doing new Configuration(that) + //because of a bug in Configuration wherein the config + //set on the MR fronted will get loaded on the backend as resouce called job.xml + //hence adding resources on the backed could potentially overwrite properties + //set on the frontend which we shouldn't be doing here + HBaseConfiguration.merge(conf, that); + return conf; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java index 49d9ad1..3b571ea 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java @@ -36,106 +36,106 @@ */ public class RevisionManagerEndpoint extends BaseEndpointCoprocessor implements RevisionManagerProtocol { - private static final Logger LOGGER = - LoggerFactory.getLogger(RevisionManagerEndpoint.class.getName()); - - private RevisionManager rmImpl = null; - - @Override - public void start(CoprocessorEnvironment env) { - super.start(env); - try { - Configuration conf = RevisionManagerConfiguration.create(env.getConfiguration()); - String className = conf.get(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, - ZKBasedRevisionManager.class.getName()); - LOGGER.debug("Using Revision Manager implementation: {}", className); - rmImpl = RevisionManagerFactory.getOpenedRevisionManager(className, conf); - } catch (IOException e) { - LOGGER.error("Failed to initialize revision manager", e); - } + private static final Logger LOGGER = + LoggerFactory.getLogger(RevisionManagerEndpoint.class.getName()); + + private RevisionManager rmImpl = null; + + @Override + public void start(CoprocessorEnvironment env) { + super.start(env); + try { + Configuration conf = RevisionManagerConfiguration.create(env.getConfiguration()); + String className = conf.get(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, + ZKBasedRevisionManager.class.getName()); + LOGGER.debug("Using Revision Manager implementation: {}", className); + rmImpl = RevisionManagerFactory.getOpenedRevisionManager(className, conf); + } catch (IOException e) { + LOGGER.error("Failed to initialize revision manager", e); } - - @Override - public void stop(CoprocessorEnvironment env) { - if (rmImpl != null) { - try { - rmImpl.close(); - } catch (IOException e) { - LOGGER.warn("Error closing revision manager.", e); - } - } - super.stop(env); - } - - @Override - public void initialize(Configuration conf) { - // do nothing, HBase controls life cycle - } - - @Override - public void open() throws IOException { - // do nothing, HBase controls life cycle - } - - @Override - public void close() throws IOException { - // do nothing, HBase controls life cycle - } - - @Override - public void createTable(String table, List columnFamilies) throws IOException { - rmImpl.createTable(table, columnFamilies); - } - - @Override - public void dropTable(String table) throws IOException { - rmImpl.dropTable(table); - } - - @Override - public Transaction beginWriteTransaction(String table, List families) - throws IOException { - return rmImpl.beginWriteTransaction(table, families); - } - - @Override - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException { - return rmImpl.beginWriteTransaction(table, families, keepAlive); - } - - @Override - public void commitWriteTransaction(Transaction transaction) - throws IOException { - rmImpl.commitWriteTransaction(transaction); - } - - @Override - public void abortWriteTransaction(Transaction transaction) - throws IOException { - rmImpl.abortWriteTransaction(transaction); - } - - @Override - public TableSnapshot createSnapshot(String tableName) throws IOException { - return rmImpl.createSnapshot(tableName); - } - - @Override - public TableSnapshot createSnapshot(String tableName, long revision) - throws IOException { - return rmImpl.createSnapshot(tableName, revision); - } - - @Override - public void keepAlive(Transaction transaction) throws IOException { - rmImpl.keepAlive(transaction); - } - - @Override - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException { - return rmImpl.getAbortedWriteTransactions(table, columnFamily); + } + + @Override + public void stop(CoprocessorEnvironment env) { + if (rmImpl != null) { + try { + rmImpl.close(); + } catch (IOException e) { + LOGGER.warn("Error closing revision manager.", e); + } } + super.stop(env); + } + + @Override + public void initialize(Configuration conf) { + // do nothing, HBase controls life cycle + } + + @Override + public void open() throws IOException { + // do nothing, HBase controls life cycle + } + + @Override + public void close() throws IOException { + // do nothing, HBase controls life cycle + } + + @Override + public void createTable(String table, List columnFamilies) throws IOException { + rmImpl.createTable(table, columnFamilies); + } + + @Override + public void dropTable(String table) throws IOException { + rmImpl.dropTable(table); + } + + @Override + public Transaction beginWriteTransaction(String table, List families) + throws IOException { + return rmImpl.beginWriteTransaction(table, families); + } + + @Override + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException { + return rmImpl.beginWriteTransaction(table, families, keepAlive); + } + + @Override + public void commitWriteTransaction(Transaction transaction) + throws IOException { + rmImpl.commitWriteTransaction(transaction); + } + + @Override + public void abortWriteTransaction(Transaction transaction) + throws IOException { + rmImpl.abortWriteTransaction(transaction); + } + + @Override + public TableSnapshot createSnapshot(String tableName) throws IOException { + return rmImpl.createSnapshot(tableName); + } + + @Override + public TableSnapshot createSnapshot(String tableName, long revision) + throws IOException { + return rmImpl.createSnapshot(tableName, revision); + } + + @Override + public void keepAlive(Transaction transaction) throws IOException { + rmImpl.keepAlive(transaction); + } + + @Override + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException { + return rmImpl.getAbortedWriteTransactions(table, columnFamily); + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java index c6ee50e..a25da22 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java @@ -34,92 +34,92 @@ */ public class RevisionManagerEndpointClient implements RevisionManager, Configurable { - private Configuration conf = null; - private RevisionManager rmProxy; - - @Override - public Configuration getConf() { - return this.conf; - } - - @Override - public void setConf(Configuration arg0) { - this.conf = arg0; - } - - @Override - public void initialize(Configuration conf) { - // do nothing - } - - @Override - public void open() throws IOException { - // clone to adjust RPC settings unique to proxy - Configuration clonedConf = new Configuration(conf); - // conf.set("hbase.ipc.client.connect.max.retries", "0"); - // conf.setInt(HConstants.HBASE_CLIENT_RPC_MAXATTEMPTS, 1); - clonedConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); // do not retry RPC - HTable table = new HTable(clonedConf, HConstants.ROOT_TABLE_NAME); - rmProxy = table.coprocessorProxy(RevisionManagerProtocol.class, - Bytes.toBytes("anyRow")); - rmProxy.open(); - } - - @Override - public void close() throws IOException { - rmProxy.close(); - } - - @Override - public void createTable(String table, List columnFamilies) throws IOException { - rmProxy.createTable(table, columnFamilies); - } - - @Override - public void dropTable(String table) throws IOException { - rmProxy.dropTable(table); - } - - @Override - public Transaction beginWriteTransaction(String table, List families) throws IOException { - return rmProxy.beginWriteTransaction(table, families); - } - - @Override - public Transaction beginWriteTransaction(String table, List families, long keepAlive) - throws IOException { - return rmProxy.beginWriteTransaction(table, families, keepAlive); - } - - @Override - public void commitWriteTransaction(Transaction transaction) throws IOException { - rmProxy.commitWriteTransaction(transaction); - } - - @Override - public void abortWriteTransaction(Transaction transaction) throws IOException { - rmProxy.abortWriteTransaction(transaction); - } - - @Override - public List getAbortedWriteTransactions(String table, String columnFamily) - throws IOException { - return rmProxy.getAbortedWriteTransactions(table, columnFamily); - } - - @Override - public TableSnapshot createSnapshot(String tableName) throws IOException { - return rmProxy.createSnapshot(tableName); - } - - @Override - public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { - return rmProxy.createSnapshot(tableName, revision); - } - - @Override - public void keepAlive(Transaction transaction) throws IOException { - rmProxy.keepAlive(transaction); - } + private Configuration conf = null; + private RevisionManager rmProxy; + + @Override + public Configuration getConf() { + return this.conf; + } + + @Override + public void setConf(Configuration arg0) { + this.conf = arg0; + } + + @Override + public void initialize(Configuration conf) { + // do nothing + } + + @Override + public void open() throws IOException { + // clone to adjust RPC settings unique to proxy + Configuration clonedConf = new Configuration(conf); + // conf.set("hbase.ipc.client.connect.max.retries", "0"); + // conf.setInt(HConstants.HBASE_CLIENT_RPC_MAXATTEMPTS, 1); + clonedConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); // do not retry RPC + HTable table = new HTable(clonedConf, HConstants.ROOT_TABLE_NAME); + rmProxy = table.coprocessorProxy(RevisionManagerProtocol.class, + Bytes.toBytes("anyRow")); + rmProxy.open(); + } + + @Override + public void close() throws IOException { + rmProxy.close(); + } + + @Override + public void createTable(String table, List columnFamilies) throws IOException { + rmProxy.createTable(table, columnFamilies); + } + + @Override + public void dropTable(String table) throws IOException { + rmProxy.dropTable(table); + } + + @Override + public Transaction beginWriteTransaction(String table, List families) throws IOException { + return rmProxy.beginWriteTransaction(table, families); + } + + @Override + public Transaction beginWriteTransaction(String table, List families, long keepAlive) + throws IOException { + return rmProxy.beginWriteTransaction(table, families, keepAlive); + } + + @Override + public void commitWriteTransaction(Transaction transaction) throws IOException { + rmProxy.commitWriteTransaction(transaction); + } + + @Override + public void abortWriteTransaction(Transaction transaction) throws IOException { + rmProxy.abortWriteTransaction(transaction); + } + + @Override + public List getAbortedWriteTransactions(String table, String columnFamily) + throws IOException { + return rmProxy.getAbortedWriteTransactions(table, columnFamily); + } + + @Override + public TableSnapshot createSnapshot(String tableName) throws IOException { + return rmProxy.createSnapshot(tableName); + } + + @Override + public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { + return rmProxy.createSnapshot(tableName, revision); + } + + @Override + public void keepAlive(Transaction transaction) throws IOException { + rmProxy.keepAlive(transaction); + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerFactory.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerFactory.java index 2d3d3e4..4c6f1a1 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerFactory.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerFactory.java @@ -29,77 +29,77 @@ */ public class RevisionManagerFactory { - public static final String REVISION_MGR_IMPL_CLASS = "revision.manager.impl.class"; + public static final String REVISION_MGR_IMPL_CLASS = "revision.manager.impl.class"; - /** - * Gets an instance of revision manager. - * - * @param conf The configuration required to created the revision manager. - * @return the revision manager An instance of revision manager. - * @throws IOException Signals that an I/O exception has occurred. - */ - private static RevisionManager getRevisionManager(String className, Configuration conf) throws IOException { + /** + * Gets an instance of revision manager. + * + * @param conf The configuration required to created the revision manager. + * @return the revision manager An instance of revision manager. + * @throws IOException Signals that an I/O exception has occurred. + */ + private static RevisionManager getRevisionManager(String className, Configuration conf) throws IOException { - RevisionManager revisionMgr; - ClassLoader classLoader = Thread.currentThread() - .getContextClassLoader(); - if (classLoader == null) { - classLoader = RevisionManagerFactory.class.getClassLoader(); - } - try { - Class revisionMgrClass = Class - .forName(className, true, classLoader).asSubclass(RevisionManager.class); - revisionMgr = (RevisionManager) revisionMgrClass.newInstance(); - revisionMgr.initialize(conf); - } catch (ClassNotFoundException e) { - throw new IOException( - "The implementation class of revision manager not found.", - e); - } catch (InstantiationException e) { - throw new IOException( - "Exception encountered during instantiating revision manager implementation.", - e); - } catch (IllegalAccessException e) { - throw new IOException( - "IllegalAccessException encountered during instantiating revision manager implementation.", - e); - } catch (IllegalArgumentException e) { - throw new IOException( - "IllegalArgumentException encountered during instantiating revision manager implementation.", - e); - } - return revisionMgr; + RevisionManager revisionMgr; + ClassLoader classLoader = Thread.currentThread() + .getContextClassLoader(); + if (classLoader == null) { + classLoader = RevisionManagerFactory.class.getClassLoader(); } + try { + Class revisionMgrClass = Class + .forName(className, true, classLoader).asSubclass(RevisionManager.class); + revisionMgr = (RevisionManager) revisionMgrClass.newInstance(); + revisionMgr.initialize(conf); + } catch (ClassNotFoundException e) { + throw new IOException( + "The implementation class of revision manager not found.", + e); + } catch (InstantiationException e) { + throw new IOException( + "Exception encountered during instantiating revision manager implementation.", + e); + } catch (IllegalAccessException e) { + throw new IOException( + "IllegalAccessException encountered during instantiating revision manager implementation.", + e); + } catch (IllegalArgumentException e) { + throw new IOException( + "IllegalArgumentException encountered during instantiating revision manager implementation.", + e); + } + return revisionMgr; + } - /** - * Internally used by endpoint implementation to instantiate from different configuration setting. - * @param className - * @param conf - * @return the opened revision manager - * @throws IOException - */ - static RevisionManager getOpenedRevisionManager(String className, Configuration conf) throws IOException { + /** + * Internally used by endpoint implementation to instantiate from different configuration setting. + * @param className + * @param conf + * @return the opened revision manager + * @throws IOException + */ + static RevisionManager getOpenedRevisionManager(String className, Configuration conf) throws IOException { - RevisionManager revisionMgr = RevisionManagerFactory.getRevisionManager(className, conf); - if (revisionMgr instanceof Configurable) { - ((Configurable) revisionMgr).setConf(conf); - } - revisionMgr.open(); - return revisionMgr; + RevisionManager revisionMgr = RevisionManagerFactory.getRevisionManager(className, conf); + if (revisionMgr instanceof Configurable) { + ((Configurable) revisionMgr).setConf(conf); } + revisionMgr.open(); + return revisionMgr; + } - /** - * Gets an instance of revision manager which is opened. - * The revision manager implementation can be specified as {@link #REVISION_MGR_IMPL_CLASS}, - * default is {@link ZKBasedRevisionManager}. - * @param conf revision manager configuration - * @return RevisionManager An instance of revision manager. - * @throws IOException - */ - public static RevisionManager getOpenedRevisionManager(Configuration conf) throws IOException { - String className = conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS, - ZKBasedRevisionManager.class.getName()); - return getOpenedRevisionManager(className, conf); - } + /** + * Gets an instance of revision manager which is opened. + * The revision manager implementation can be specified as {@link #REVISION_MGR_IMPL_CLASS}, + * default is {@link ZKBasedRevisionManager}. + * @param conf revision manager configuration + * @return RevisionManager An instance of revision manager. + * @throws IOException + */ + public static RevisionManager getOpenedRevisionManager(Configuration conf) throws IOException { + String className = conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS, + ZKBasedRevisionManager.class.getName()); + return getOpenedRevisionManager(className, conf); + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerProtocol.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerProtocol.java index 4cbde74..a46ce39 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerProtocol.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerProtocol.java @@ -25,6 +25,6 @@ * (needs to extend CoprocessorProtocol) */ public interface RevisionManagerProtocol extends RevisionManager, - CoprocessorProtocol { + CoprocessorProtocol { } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/TableSnapshot.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/TableSnapshot.java index fa94157..beb6513 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/TableSnapshot.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/TableSnapshot.java @@ -28,63 +28,63 @@ */ public class TableSnapshot implements Serializable { - private String name; + private String name; - private Map cfRevisionMap; + private Map cfRevisionMap; - private long latestRevision; + private long latestRevision; - public TableSnapshot(String name, Map cfRevMap, long latestRevision) { - this.name = name; - if (cfRevMap == null) { - throw new IllegalArgumentException("revision map cannot be null"); - } - this.cfRevisionMap = cfRevMap; - this.latestRevision = latestRevision; + public TableSnapshot(String name, Map cfRevMap, long latestRevision) { + this.name = name; + if (cfRevMap == null) { + throw new IllegalArgumentException("revision map cannot be null"); } + this.cfRevisionMap = cfRevMap; + this.latestRevision = latestRevision; + } - /** - * Gets the table name. - * - * @return String The name of the table. - */ - public String getTableName() { - return name; - } + /** + * Gets the table name. + * + * @return String The name of the table. + */ + public String getTableName() { + return name; + } - /** - * Gets the column families. - * - * @return List A list of column families associated with the snapshot. - */ - public List getColumnFamilies(){ - return new ArrayList(this.cfRevisionMap.keySet()); - } + /** + * Gets the column families. + * + * @return List A list of column families associated with the snapshot. + */ + public List getColumnFamilies(){ + return new ArrayList(this.cfRevisionMap.keySet()); + } - /** - * Gets the revision. - * - * @param familyName The name of the column family. - * @return the revision - */ - public long getRevision(String familyName){ - if(cfRevisionMap.containsKey(familyName)) - return cfRevisionMap.get(familyName); - return latestRevision; - } + /** + * Gets the revision. + * + * @param familyName The name of the column family. + * @return the revision + */ + public long getRevision(String familyName){ + if(cfRevisionMap.containsKey(familyName)) + return cfRevisionMap.get(familyName); + return latestRevision; + } - /** - * @return the latest committed revision when this snapshot was taken - */ - public long getLatestRevision() { - return latestRevision; - } + /** + * @return the latest committed revision when this snapshot was taken + */ + public long getLatestRevision() { + return latestRevision; + } - @Override - public String toString() { - String snapshot = "Table Name : " + name +" Latest Revision: " + latestRevision - + " Column Familiy revision : " + cfRevisionMap.toString(); - return snapshot; - } + @Override + public String toString() { + String snapshot = "Table Name : " + name +" Latest Revision: " + latestRevision + + " Column Familiy revision : " + cfRevisionMap.toString(); + return snapshot; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/Transaction.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/Transaction.java index 1d17ca5..09a5f89 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/Transaction.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/Transaction.java @@ -28,89 +28,89 @@ */ public class Transaction implements Serializable { - private String tableName; - private List columnFamilies = new ArrayList(); - private long timeStamp; - private long keepAlive; - private long revision; + private String tableName; + private List columnFamilies = new ArrayList(); + private long timeStamp; + private long keepAlive; + private long revision; - Transaction(String tableName, List columnFamilies, long revision, long timestamp) { - this.tableName = tableName; - this.columnFamilies = columnFamilies; - this.timeStamp = timestamp; - this.revision = revision; - } + Transaction(String tableName, List columnFamilies, long revision, long timestamp) { + this.tableName = tableName; + this.columnFamilies = columnFamilies; + this.timeStamp = timestamp; + this.revision = revision; + } - /** - * @return The revision number associated with a transaction. - */ - public long getRevisionNumber() { - return this.revision; - } + /** + * @return The revision number associated with a transaction. + */ + public long getRevisionNumber() { + return this.revision; + } - /** - * @return The table name associated with a transaction. - */ - public String getTableName() { - return tableName; - } + /** + * @return The table name associated with a transaction. + */ + public String getTableName() { + return tableName; + } - /** - * @return The column families associated with a transaction. - */ - public List getColumnFamilies() { - return columnFamilies; - } + /** + * @return The column families associated with a transaction. + */ + public List getColumnFamilies() { + return columnFamilies; + } - /** - * @return The expire timestamp associated with a transaction. - */ - long getTransactionExpireTimeStamp() { - return this.timeStamp + this.keepAlive; - } + /** + * @return The expire timestamp associated with a transaction. + */ + long getTransactionExpireTimeStamp() { + return this.timeStamp + this.keepAlive; + } - void setKeepAlive(long seconds) { - this.keepAlive = seconds; - } + void setKeepAlive(long seconds) { + this.keepAlive = seconds; + } - /** - * Gets the keep alive value. - * - * @return long The keep alive value for the transaction. - */ - public long getKeepAliveValue() { - return this.keepAlive; - } + /** + * Gets the keep alive value. + * + * @return long The keep alive value for the transaction. + */ + public long getKeepAliveValue() { + return this.keepAlive; + } - /** - * Gets the family revision info. - * - * @return FamilyRevision An instance of FamilyRevision associated with the transaction. - */ - FamilyRevision getFamilyRevisionInfo() { - return new FamilyRevision(revision, getTransactionExpireTimeStamp()); - } + /** + * Gets the family revision info. + * + * @return FamilyRevision An instance of FamilyRevision associated with the transaction. + */ + FamilyRevision getFamilyRevisionInfo() { + return new FamilyRevision(revision, getTransactionExpireTimeStamp()); + } - /** - * Keep alive transaction. This methods extends the expire timestamp of a - * transaction by the "keep alive" amount. - */ - void keepAliveTransaction() { - this.timeStamp = this.timeStamp + this.keepAlive; - } + /** + * Keep alive transaction. This methods extends the expire timestamp of a + * transaction by the "keep alive" amount. + */ + void keepAliveTransaction() { + this.timeStamp = this.timeStamp + this.keepAlive; + } - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Revision : "); - sb.append(this.getRevisionNumber()); - sb.append(" Timestamp : "); - sb.append(this.getTransactionExpireTimeStamp()); - sb.append("\n").append("Table : "); - sb.append(this.tableName).append("\n"); - sb.append("Column Families : "); - sb.append(this.columnFamilies.toString()); - return sb.toString(); - } + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Revision : "); + sb.append(this.getRevisionNumber()); + sb.append(" Timestamp : "); + sb.append(this.getTransactionExpireTimeStamp()); + sb.append("\n").append("Table : "); + sb.append(this.tableName).append("\n"); + sb.append("Column Families : "); + sb.append(this.columnFamilies.toString()); + return sb.toString(); + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java index f4556d1..89f2868 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java @@ -38,424 +38,424 @@ */ public class ZKBasedRevisionManager implements RevisionManager { - private static final Logger LOG = LoggerFactory.getLogger(ZKBasedRevisionManager.class); - private String zkHostList; - private String baseDir; - private ZKUtil zkUtil; - private long writeTxnTimeout; - - - /* - * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#initialize() - */ - @Override - public void initialize(Configuration conf) { - conf = new Configuration(conf); - if (conf.get(RMConstants.ZOOKEEPER_HOSTLIST) == null) { - String zkHostList = conf.get(HConstants.ZOOKEEPER_QUORUM); - int port = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, - HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); - String[] splits = zkHostList.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - sb.append(','); - } - sb.deleteCharAt(sb.length() - 1); - conf.set(RMConstants.ZOOKEEPER_HOSTLIST, sb.toString()); - } - this.zkHostList = conf.get(RMConstants.ZOOKEEPER_HOSTLIST); - this.baseDir = conf.get(RMConstants.ZOOKEEPER_DATADIR); - this.writeTxnTimeout = Long.parseLong(conf.get(RMConstants.WRITE_TRANSACTION_TIMEOUT)); + private static final Logger LOG = LoggerFactory.getLogger(ZKBasedRevisionManager.class); + private String zkHostList; + private String baseDir; + private ZKUtil zkUtil; + private long writeTxnTimeout; + + + /* + * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#initialize() + */ + @Override + public void initialize(Configuration conf) { + conf = new Configuration(conf); + if (conf.get(RMConstants.ZOOKEEPER_HOSTLIST) == null) { + String zkHostList = conf.get(HConstants.ZOOKEEPER_QUORUM); + int port = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, + HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); + String[] splits = zkHostList.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + sb.append(','); + } + sb.deleteCharAt(sb.length() - 1); + conf.set(RMConstants.ZOOKEEPER_HOSTLIST, sb.toString()); } - - /** - * Open a ZooKeeper connection - * @throws java.io.IOException - */ - - public void open() throws IOException { - zkUtil = new ZKUtil(zkHostList, this.baseDir); - zkUtil.createRootZNodes(); - LOG.info("Created root znodes for revision manager."); + this.zkHostList = conf.get(RMConstants.ZOOKEEPER_HOSTLIST); + this.baseDir = conf.get(RMConstants.ZOOKEEPER_DATADIR); + this.writeTxnTimeout = Long.parseLong(conf.get(RMConstants.WRITE_TRANSACTION_TIMEOUT)); + } + + /** + * Open a ZooKeeper connection + * @throws java.io.IOException + */ + + public void open() throws IOException { + zkUtil = new ZKUtil(zkHostList, this.baseDir); + zkUtil.createRootZNodes(); + LOG.info("Created root znodes for revision manager."); + } + + /** + * Close Zookeeper connection + */ + public void close() { + zkUtil.closeZKConnection(); + } + + private void checkInputParams(String table, List families) { + if (table == null) { + throw new IllegalArgumentException( + "The table name must be specified for reading."); } - - /** - * Close Zookeeper connection - */ - public void close() { - zkUtil.closeZKConnection(); + if (families == null || families.isEmpty()) { + throw new IllegalArgumentException( + "At least one column family should be specified for reading."); } - - private void checkInputParams(String table, List families) { - if (table == null) { - throw new IllegalArgumentException( - "The table name must be specified for reading."); - } - if (families == null || families.isEmpty()) { - throw new IllegalArgumentException( - "At least one column family should be specified for reading."); - } + } + + @Override + public void createTable(String table, List columnFamilies) throws IOException { + zkUtil.createRootZNodes(); + zkUtil.setUpZnodesForTable(table, columnFamilies); + } + + @Override + public void dropTable(String table) throws IOException { + zkUtil.deleteZNodes(table); + } + + /* @param table + /* @param families + /* @param keepAlive + /* @return + /* @throws IOException + * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List, long) + */ + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException { + + checkInputParams(table, families); + zkUtil.setUpZnodesForTable(table, families); + long nextId = zkUtil.nextId(table); + long expireTimestamp = zkUtil.getTimeStamp(); + Transaction transaction = new Transaction(table, families, nextId, + expireTimestamp); + if (keepAlive != -1) { + transaction.setKeepAlive(keepAlive); + } else { + transaction.setKeepAlive(writeTxnTimeout); } - @Override - public void createTable(String table, List columnFamilies) throws IOException { - zkUtil.createRootZNodes(); - zkUtil.setUpZnodesForTable(table, columnFamilies); - } - - @Override - public void dropTable(String table) throws IOException { - zkUtil.deleteZNodes(table); - } - - /* @param table - /* @param families - /* @param keepAlive - /* @return - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List, long) - */ - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException { - - checkInputParams(table, families); - zkUtil.setUpZnodesForTable(table, families); - long nextId = zkUtil.nextId(table); - long expireTimestamp = zkUtil.getTimeStamp(); - Transaction transaction = new Transaction(table, families, nextId, - expireTimestamp); - if (keepAlive != -1) { - transaction.setKeepAlive(keepAlive); - } else { - transaction.setKeepAlive(writeTxnTimeout); + refreshTransactionList(transaction.getTableName()); + String lockPath = prepareLockNode(table); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while beginning transaction. " + + transaction.toString()); + } else { + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, table, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.APPEND); } + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); + } - refreshTransactionList(transaction.getTableName()); - String lockPath = prepareLockNode(table); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while beginning transaction. " - + transaction.toString()); - } else { - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, table, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.APPEND); - } - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); + return transaction; + } + + /* @param table The table name. + /* @param families The column families involved in the transaction. + /* @return transaction The transaction which was started. + /* @throws IOException + * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List) + */ + public Transaction beginWriteTransaction(String table, List families) + throws IOException { + return beginWriteTransaction(table, families, -1); + } + + /** + * This method commits a write transaction. + * @param transaction The revision information associated with transaction. + * @throws java.io.IOException + */ + public void commitWriteTransaction(Transaction transaction) throws IOException { + refreshTransactionList(transaction.getTableName()); + + String lockPath = prepareLockNode(transaction.getTableName()); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while commiting transaction. " + + transaction.toString()); + } else { + String tableName = transaction.getTableName(); + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.REMOVE); } - return transaction; - } - - /* @param table The table name. - /* @param families The column families involved in the transaction. - /* @return transaction The transaction which was started. - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List) - */ - public Transaction beginWriteTransaction(String table, List families) - throws IOException { - return beginWriteTransaction(table, families, -1); + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); } - - /** - * This method commits a write transaction. - * @param transaction The revision information associated with transaction. - * @throws java.io.IOException - */ - public void commitWriteTransaction(Transaction transaction) throws IOException { - refreshTransactionList(transaction.getTableName()); - - String lockPath = prepareLockNode(transaction.getTableName()); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while commiting transaction. " - + transaction.toString()); - } else { - String tableName = transaction.getTableName(); - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.REMOVE); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); + LOG.info("Write Transaction committed: " + transaction.toString()); + } + + /** + * This method aborts a write transaction. + * @param transaction + * @throws java.io.IOException + */ + public void abortWriteTransaction(Transaction transaction) throws IOException { + + refreshTransactionList(transaction.getTableName()); + String lockPath = prepareLockNode(transaction.getTableName()); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while aborting transaction. " + + transaction.toString()); + } else { + String tableName = transaction.getTableName(); + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction + .getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.REMOVE); + path = PathUtil.getAbortInformationPath(baseDir, + tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.APPEND); } - LOG.info("Write Transaction committed: " + transaction.toString()); - } - /** - * This method aborts a write transaction. - * @param transaction - * @throws java.io.IOException - */ - public void abortWriteTransaction(Transaction transaction) throws IOException { - - refreshTransactionList(transaction.getTableName()); - String lockPath = prepareLockNode(transaction.getTableName()); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while aborting transaction. " - + transaction.toString()); - } else { - String tableName = transaction.getTableName(); - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction - .getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.REMOVE); - path = PathUtil.getAbortInformationPath(baseDir, - tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.APPEND); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); - } - LOG.info("Write Transaction aborted: " + transaction.toString()); + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); } + LOG.info("Write Transaction aborted: " + transaction.toString()); + } - /* @param transaction + /* @param transaction /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#keepAlive(org.apache.hcatalog.hbase.snapshot.Transaction) - */ - public void keepAlive(Transaction transaction) - throws IOException { - - refreshTransactionList(transaction.getTableName()); - transaction.keepAliveTransaction(); - String lockPath = prepareLockNode(transaction.getTableName()); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock for keep alive of transaction. " - + transaction.toString()); - } else { - String tableName = transaction.getTableName(); - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.KEEP_ALIVE); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); + * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#keepAlive(org.apache.hcatalog.hbase.snapshot.Transaction) + */ + public void keepAlive(Transaction transaction) + throws IOException { + + refreshTransactionList(transaction.getTableName()); + transaction.keepAliveTransaction(); + String lockPath = prepareLockNode(transaction.getTableName()); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock for keep alive of transaction. " + + transaction.toString()); + } else { + String tableName = transaction.getTableName(); + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.KEEP_ALIVE); } + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); } - /* This method allows the user to create latest snapshot of a - /* table. - /* @param tableName The table whose snapshot is being created. - /* @return TableSnapshot An instance of TableSnaphot - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String) - */ - public TableSnapshot createSnapshot(String tableName) throws IOException { - refreshTransactionList(tableName); - long latestID = zkUtil.currentID(tableName); - HashMap cfMap = new HashMap(); - List columnFamilyNames = zkUtil.getColumnFamiliesOfTable(tableName); - - for (String cfName : columnFamilyNames) { - String cfPath = PathUtil.getRunningTxnInfoPath(baseDir, tableName, cfName); - List tranxList = zkUtil.getTransactionList(cfPath); - long version; - if (!tranxList.isEmpty()) { - Collections.sort(tranxList); - // get the smallest running Transaction ID - long runningVersion = tranxList.get(0).getRevision(); - version = runningVersion - 1; - } else { - version = latestID; - } - cfMap.put(cfName, version); - } - - TableSnapshot snapshot = new TableSnapshot(tableName, cfMap, latestID); - LOG.debug("Created snapshot For table: " + tableName + " snapshot: " + snapshot); - return snapshot; + } + + /* This method allows the user to create latest snapshot of a + /* table. + /* @param tableName The table whose snapshot is being created. + /* @return TableSnapshot An instance of TableSnaphot + /* @throws IOException + * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String) + */ + public TableSnapshot createSnapshot(String tableName) throws IOException { + refreshTransactionList(tableName); + long latestID = zkUtil.currentID(tableName); + HashMap cfMap = new HashMap(); + List columnFamilyNames = zkUtil.getColumnFamiliesOfTable(tableName); + + for (String cfName : columnFamilyNames) { + String cfPath = PathUtil.getRunningTxnInfoPath(baseDir, tableName, cfName); + List tranxList = zkUtil.getTransactionList(cfPath); + long version; + if (!tranxList.isEmpty()) { + Collections.sort(tranxList); + // get the smallest running Transaction ID + long runningVersion = tranxList.get(0).getRevision(); + version = runningVersion - 1; + } else { + version = latestID; + } + cfMap.put(cfName, version); } - /* This method allows the user to create snapshot of a - /* table with a given revision number. - /* @param tableName - /* @param revision - /* @return TableSnapshot - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String, long) - */ - public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { + TableSnapshot snapshot = new TableSnapshot(tableName, cfMap, latestID); + LOG.debug("Created snapshot For table: " + tableName + " snapshot: " + snapshot); + return snapshot; + } + + /* This method allows the user to create snapshot of a + /* table with a given revision number. + /* @param tableName + /* @param revision + /* @return TableSnapshot + /* @throws IOException + * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String, long) + */ + public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { + + long currentID = zkUtil.currentID(tableName); + if (revision > currentID) { + throw new IOException( + "The revision specified in the snapshot is higher than the current revision of the table."); + } + refreshTransactionList(tableName); + HashMap cfMap = new HashMap(); + List columnFamilies = zkUtil.getColumnFamiliesOfTable(tableName); - long currentID = zkUtil.currentID(tableName); - if (revision > currentID) { - throw new IOException( - "The revision specified in the snapshot is higher than the current revision of the table."); - } - refreshTransactionList(tableName); - HashMap cfMap = new HashMap(); - List columnFamilies = zkUtil.getColumnFamiliesOfTable(tableName); + for (String cf : columnFamilies) { + cfMap.put(cf, revision); + } - for (String cf : columnFamilies) { - cfMap.put(cf, revision); + return new TableSnapshot(tableName, cfMap, revision); + } + + /** + * Get the list of in-progress Transactions for a column family + * @param table the table name + * @param columnFamily the column family name + * @return a list of in-progress WriteTransactions + * @throws java.io.IOException + */ + List getRunningTransactions(String table, + String columnFamily) throws IOException { + String path = PathUtil.getRunningTxnInfoPath(baseDir, table, + columnFamily); + return zkUtil.getTransactionList(path); + } + + @Override + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException { + String path = PathUtil.getAbortInformationPath(baseDir, table, columnFamily); + return zkUtil.getTransactionList(path); + } + + private void refreshTransactionList(String tableName) throws IOException { + String lockPath = prepareLockNode(tableName); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while refreshing transactions of table " + + tableName + "."); + } else { + List cfPaths = zkUtil + .getColumnFamiliesOfTable(tableName); + for (String cf : cfPaths) { + String runningDataPath = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cf); + zkUtil.refreshTransactions(runningDataPath); } - return new TableSnapshot(tableName, cfMap, revision); + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); } - /** - * Get the list of in-progress Transactions for a column family - * @param table the table name - * @param columnFamily the column family name - * @return a list of in-progress WriteTransactions - * @throws java.io.IOException - */ - List getRunningTransactions(String table, - String columnFamily) throws IOException { - String path = PathUtil.getRunningTxnInfoPath(baseDir, table, - columnFamily); - return zkUtil.getTransactionList(path); - } + } - @Override - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException { - String path = PathUtil.getAbortInformationPath(baseDir, table, columnFamily); - return zkUtil.getTransactionList(path); - } + private String prepareLockNode(String tableName) throws IOException { + String txnDataPath = PathUtil.getTxnDataPath(this.baseDir, tableName); + String lockPath = PathUtil.getLockManagementNode(txnDataPath); + zkUtil.ensurePathExists(lockPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + return lockPath; + } - private void refreshTransactionList(String tableName) throws IOException { - String lockPath = prepareLockNode(tableName); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while refreshing transactions of table " - + tableName + "."); - } else { - List cfPaths = zkUtil - .getColumnFamiliesOfTable(tableName); - for (String cf : cfPaths) { - String runningDataPath = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cf); - zkUtil.refreshTransactions(runningDataPath); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); - } + /* + * This class is a listener class for the locks used in revision management. + * TBD: Use the following class to signal that that the lock is actually + * been granted. + */ + class RMLockListener implements LockListener { - } + /* + * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() + */ + @Override + public void lockAcquired() { - private String prepareLockNode(String tableName) throws IOException { - String txnDataPath = PathUtil.getTxnDataPath(this.baseDir, tableName); - String lockPath = PathUtil.getLockManagementNode(txnDataPath); - zkUtil.ensurePathExists(lockPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - return lockPath; } /* - * This class is a listener class for the locks used in revision management. - * TBD: Use the following class to signal that that the lock is actually - * been granted. + * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() */ - class RMLockListener implements LockListener { - - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() - */ - @Override - public void lockAcquired() { - - } - - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() - */ - @Override - public void lockReleased() { - - } + @Override + public void lockReleased() { } + } + } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKUtil.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKUtil.java index 58fd435..0ce2a15 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKUtil.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKUtil.java @@ -44,482 +44,482 @@ class ZKUtil { - private int DEFAULT_SESSION_TIMEOUT = 1000000; - private ZooKeeper zkSession; - private String baseDir; - private String connectString; - private static final Logger LOG = LoggerFactory.getLogger(ZKUtil.class); - - static enum UpdateMode { - APPEND, REMOVE, KEEP_ALIVE + private int DEFAULT_SESSION_TIMEOUT = 1000000; + private ZooKeeper zkSession; + private String baseDir; + private String connectString; + private static final Logger LOG = LoggerFactory.getLogger(ZKUtil.class); + + static enum UpdateMode { + APPEND, REMOVE, KEEP_ALIVE + } + + ; + + ZKUtil(String connection, String baseDir) { + this.connectString = connection; + this.baseDir = baseDir; + } + + /** + * This method creates znodes related to table. + * + * @param table The name of the table. + * @param families The list of column families of the table. + * @throws IOException + */ + void setUpZnodesForTable(String table, List families) + throws IOException { + + String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, table); + ensurePathExists(transactionDataTablePath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + for (String cf : families) { + String runningDataPath = PathUtil.getRunningTxnInfoPath( + this.baseDir, table, cf); + ensurePathExists(runningDataPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + String abortDataPath = PathUtil.getAbortInformationPath( + this.baseDir, table, cf); + ensurePathExists(abortDataPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); } - ; - - ZKUtil(String connection, String baseDir) { - this.connectString = connection; - this.baseDir = baseDir; - } - - /** - * This method creates znodes related to table. - * - * @param table The name of the table. - * @param families The list of column families of the table. - * @throws IOException - */ - void setUpZnodesForTable(String table, List families) - throws IOException { - - String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, table); - ensurePathExists(transactionDataTablePath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - for (String cf : families) { - String runningDataPath = PathUtil.getRunningTxnInfoPath( - this.baseDir, table, cf); - ensurePathExists(runningDataPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - String abortDataPath = PathUtil.getAbortInformationPath( - this.baseDir, table, cf); - ensurePathExists(abortDataPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - } - - } - - /** - * This method ensures that a given path exists in zookeeper. If the path - * does not exists, it creates one. - * - * @param path The path of znode that is required to exist. - * @param data The data to be associated with the znode. - * @param acl The ACLs required. - * @param flags The CreateMode for the znode. - * @throws IOException - */ - void ensurePathExists(String path, byte[] data, List acl, - CreateMode flags) throws IOException { - String[] dirs = path.split("/"); - String parentPath = ""; - for (String subDir : dirs) { - if (subDir.equals("") == false) { - parentPath = parentPath + "/" + subDir; - try { - Stat stat = getSession().exists(parentPath, false); - if (stat == null) { - getSession().create(parentPath, data, acl, flags); - } - } catch (Exception e) { - throw new IOException("Exception while creating path " - + parentPath, e); - } - } - } - - } - - /** - * This method returns a list of columns of a table which were used in any - * of the transactions. - * - * @param tableName The name of table. - * @return List The list of column families in table. - * @throws IOException - */ - List getColumnFamiliesOfTable(String tableName) throws IOException { - String path = PathUtil.getTxnDataPath(baseDir, tableName); - List children = null; - List columnFamlies = new ArrayList(); + } + + /** + * This method ensures that a given path exists in zookeeper. If the path + * does not exists, it creates one. + * + * @param path The path of znode that is required to exist. + * @param data The data to be associated with the znode. + * @param acl The ACLs required. + * @param flags The CreateMode for the znode. + * @throws IOException + */ + void ensurePathExists(String path, byte[] data, List acl, + CreateMode flags) throws IOException { + String[] dirs = path.split("/"); + String parentPath = ""; + for (String subDir : dirs) { + if (subDir.equals("") == false) { + parentPath = parentPath + "/" + subDir; try { - children = getSession().getChildren(path, false); - } catch (KeeperException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining columns of table.", e); - } catch (InterruptedException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining columns of table.", e); - } - - for (String child : children) { - if ((child.contains("idgen") == false) - && (child.contains("_locknode_") == false)) { - columnFamlies.add(child); - } + Stat stat = getSession().exists(parentPath, false); + if (stat == null) { + getSession().create(parentPath, data, acl, flags); + } + } catch (Exception e) { + throw new IOException("Exception while creating path " + + parentPath, e); } - return columnFamlies; + } } - /** - * This method returns a time stamp for use by the transactions. - * - * @return long The current timestamp in zookeeper. - * @throws IOException - */ - long getTimeStamp() throws IOException { - long timeStamp; - Stat stat; - String clockPath = PathUtil.getClockPath(this.baseDir); - ensurePathExists(clockPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - try { - getSession().exists(clockPath, false); - stat = getSession().setData(clockPath, null, -1); - - } catch (KeeperException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining timestamp ", e); - } catch (InterruptedException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining timestamp ", e); - } - timeStamp = stat.getMtime(); - return timeStamp; + } + + /** + * This method returns a list of columns of a table which were used in any + * of the transactions. + * + * @param tableName The name of table. + * @return List The list of column families in table. + * @throws IOException + */ + List getColumnFamiliesOfTable(String tableName) throws IOException { + String path = PathUtil.getTxnDataPath(baseDir, tableName); + List children = null; + List columnFamlies = new ArrayList(); + try { + children = getSession().getChildren(path, false); + } catch (KeeperException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining columns of table.", e); + } catch (InterruptedException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining columns of table.", e); } - /** - * This method returns the next revision number to be used for any - * transaction purposes. - * - * @param tableName The name of the table. - * @return revision number The revision number last used by any transaction. - * @throws IOException - */ - long nextId(String tableName) throws IOException { - String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); - ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - String lockNode = PathUtil.getLockManagementNode(idNode); - ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); - long id = idf.obtainID(); - return id; + for (String child : children) { + if ((child.contains("idgen") == false) + && (child.contains("_locknode_") == false)) { + columnFamlies.add(child); + } } - - /** - * The latest used revision id of the table. - * - * @param tableName The name of the table. - * @return the long The revision number to use by any transaction. - * @throws IOException Signals that an I/O exception has occurred. - */ - long currentID(String tableName) throws IOException { - String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); - ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - String lockNode = PathUtil.getLockManagementNode(idNode); - ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); - long id = idf.readID(); - return id; + return columnFamlies; + } + + /** + * This method returns a time stamp for use by the transactions. + * + * @return long The current timestamp in zookeeper. + * @throws IOException + */ + long getTimeStamp() throws IOException { + long timeStamp; + Stat stat; + String clockPath = PathUtil.getClockPath(this.baseDir); + ensurePathExists(clockPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + try { + getSession().exists(clockPath, false); + stat = getSession().setData(clockPath, null, -1); + + } catch (KeeperException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining timestamp ", e); + } catch (InterruptedException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining timestamp ", e); } - - /** - * This methods retrieves the list of transaction information associated - * with each column/column family of a table. - * - * @param path The znode path - * @return List of FamilyRevision The list of transactions in the given path. - * @throws IOException - */ - List getTransactionList(String path) - throws IOException { - - byte[] data = getRawData(path, new Stat()); - ArrayList wtxnList = new ArrayList(); - if (data == null) { - return wtxnList; - } - StoreFamilyRevisionList txnList = new StoreFamilyRevisionList(); - deserialize(txnList, data); - Iterator itr = txnList.getRevisionListIterator(); - - while (itr.hasNext()) { - StoreFamilyRevision wtxn = itr.next(); - wtxnList.add(new FamilyRevision(wtxn.getRevision(), wtxn - .getTimestamp())); - } - - return wtxnList; + timeStamp = stat.getMtime(); + return timeStamp; + } + + /** + * This method returns the next revision number to be used for any + * transaction purposes. + * + * @param tableName The name of the table. + * @return revision number The revision number last used by any transaction. + * @throws IOException + */ + long nextId(String tableName) throws IOException { + String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); + ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + String lockNode = PathUtil.getLockManagementNode(idNode); + ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); + long id = idf.obtainID(); + return id; + } + + /** + * The latest used revision id of the table. + * + * @param tableName The name of the table. + * @return the long The revision number to use by any transaction. + * @throws IOException Signals that an I/O exception has occurred. + */ + long currentID(String tableName) throws IOException { + String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); + ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + String lockNode = PathUtil.getLockManagementNode(idNode); + ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); + long id = idf.readID(); + return id; + } + + /** + * This methods retrieves the list of transaction information associated + * with each column/column family of a table. + * + * @param path The znode path + * @return List of FamilyRevision The list of transactions in the given path. + * @throws IOException + */ + List getTransactionList(String path) + throws IOException { + + byte[] data = getRawData(path, new Stat()); + ArrayList wtxnList = new ArrayList(); + if (data == null) { + return wtxnList; } - - /** - * This method returns the data associated with the path in zookeeper. - * - * @param path The znode path - * @param stat Zookeeper stat - * @return byte array The data stored in the znode. - * @throws IOException - */ - byte[] getRawData(String path, Stat stat) throws IOException { - byte[] data = null; - try { - data = getSession().getData(path, false, stat); - } catch (Exception e) { - throw new IOException( - "Exception while obtaining raw data from zookeeper path " - + path, e); - } - return data; + StoreFamilyRevisionList txnList = new StoreFamilyRevisionList(); + deserialize(txnList, data); + Iterator itr = txnList.getRevisionListIterator(); + + while (itr.hasNext()) { + StoreFamilyRevision wtxn = itr.next(); + wtxnList.add(new FamilyRevision(wtxn.getRevision(), wtxn + .getTimestamp())); } - /** - * This method created the basic znodes in zookeeper for revision - * management. - * - * @throws IOException - */ - void createRootZNodes() throws IOException { - String txnBaseNode = PathUtil.getTransactionBasePath(this.baseDir); - String clockNode = PathUtil.getClockPath(this.baseDir); - ensurePathExists(txnBaseNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - ensurePathExists(clockNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); + return wtxnList; + } + + /** + * This method returns the data associated with the path in zookeeper. + * + * @param path The znode path + * @param stat Zookeeper stat + * @return byte array The data stored in the znode. + * @throws IOException + */ + byte[] getRawData(String path, Stat stat) throws IOException { + byte[] data = null; + try { + data = getSession().getData(path, false, stat); + } catch (Exception e) { + throw new IOException( + "Exception while obtaining raw data from zookeeper path " + + path, e); } - - /** - * This method closes the zookeeper session. - */ - void closeZKConnection() { - if (zkSession != null) { + return data; + } + + /** + * This method created the basic znodes in zookeeper for revision + * management. + * + * @throws IOException + */ + void createRootZNodes() throws IOException { + String txnBaseNode = PathUtil.getTransactionBasePath(this.baseDir); + String clockNode = PathUtil.getClockPath(this.baseDir); + ensurePathExists(txnBaseNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + ensurePathExists(clockNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + } + + /** + * This method closes the zookeeper session. + */ + void closeZKConnection() { + if (zkSession != null) { + try { + zkSession.close(); + } catch (InterruptedException e) { + LOG.warn("Close failed: ", e); + } + zkSession = null; + LOG.info("Disconnected to ZooKeeper"); + } + } + + /** + * This method returns a zookeeper session. If the current session is closed, + * then a new session is created. + * + * @return ZooKeeper An instance of zookeeper client. + * @throws IOException + */ + ZooKeeper getSession() throws IOException { + if (zkSession == null || zkSession.getState() == States.CLOSED) { + synchronized (this) { + if (zkSession == null || zkSession.getState() == States.CLOSED) { + zkSession = new ZooKeeper(this.connectString, + this.DEFAULT_SESSION_TIMEOUT, new ZKWatcher()); + while (zkSession.getState() == States.CONNECTING) { try { - zkSession.close(); + Thread.sleep(1000); } catch (InterruptedException e) { - LOG.warn("Close failed: ", e); } - zkSession = null; - LOG.info("Disconnected to ZooKeeper"); + } } + } } - - /** - * This method returns a zookeeper session. If the current session is closed, - * then a new session is created. - * - * @return ZooKeeper An instance of zookeeper client. - * @throws IOException - */ - ZooKeeper getSession() throws IOException { - if (zkSession == null || zkSession.getState() == States.CLOSED) { - synchronized (this) { - if (zkSession == null || zkSession.getState() == States.CLOSED) { - zkSession = new ZooKeeper(this.connectString, - this.DEFAULT_SESSION_TIMEOUT, new ZKWatcher()); - while (zkSession.getState() == States.CONNECTING) { - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - } - } - } - } - } - return zkSession; + return zkSession; + } + + /** + * This method updates the transaction data related to a znode. + * + * @param path The path to the transaction data. + * @param updateTx The FamilyRevision to be updated. + * @param mode The mode to update like append, update, remove. + * @throws IOException + */ + void updateData(String path, FamilyRevision updateTx, UpdateMode mode) + throws IOException { + + if (updateTx == null) { + throw new IOException( + "The transaction to be updated found to be null."); } - - /** - * This method updates the transaction data related to a znode. - * - * @param path The path to the transaction data. - * @param updateTx The FamilyRevision to be updated. - * @param mode The mode to update like append, update, remove. - * @throws IOException - */ - void updateData(String path, FamilyRevision updateTx, UpdateMode mode) - throws IOException { - - if (updateTx == null) { - throw new IOException( - "The transaction to be updated found to be null."); - } - List currentData = getTransactionList(path); - List newData = new ArrayList(); - boolean dataFound = false; - long updateVersion = updateTx.getRevision(); - for (FamilyRevision tranx : currentData) { - if (tranx.getRevision() != updateVersion) { - newData.add(tranx); - } else { - dataFound = true; - } - } - switch (mode) { - case REMOVE: - if (dataFound == false) { - throw new IOException( - "The transaction to be removed not found in the data."); - } - LOG.info("Removed trasaction : " + updateTx.toString()); - break; - case KEEP_ALIVE: - if (dataFound == false) { - throw new IOException( - "The transaction to be kept alove not found in the data. It might have been expired."); - } - newData.add(updateTx); - LOG.info("keep alive of transaction : " + updateTx.toString()); - break; - case APPEND: - if (dataFound == true) { - throw new IOException( - "The data to be appended already exists."); - } - newData.add(updateTx); - LOG.info("Added transaction : " + updateTx.toString()); - break; - } - - // For serialization purposes. - List newTxnList = new ArrayList(); - for (FamilyRevision wtxn : newData) { - StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), - wtxn.getExpireTimestamp()); - newTxnList.add(newTxn); - } - StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); - byte[] newByteData = serialize(wtxnList); - - Stat stat = null; - try { - stat = zkSession.setData(path, newByteData, -1); - } catch (KeeperException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } catch (InterruptedException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } - - if (stat != null) { - LOG.info("Transaction list stored at " + path + "."); - } - + List currentData = getTransactionList(path); + List newData = new ArrayList(); + boolean dataFound = false; + long updateVersion = updateTx.getRevision(); + for (FamilyRevision tranx : currentData) { + if (tranx.getRevision() != updateVersion) { + newData.add(tranx); + } else { + dataFound = true; + } } - - /** - * Refresh transactions on a given transaction data path. - * - * @param path The path to the transaction data. - * @throws IOException Signals that an I/O exception has occurred. - */ - void refreshTransactions(String path) throws IOException { - List currentData = getTransactionList(path); - List newData = new ArrayList(); - - for (FamilyRevision tranx : currentData) { - if (tranx.getExpireTimestamp() > getTimeStamp()) { - newData.add(tranx); - } - } - - if (newData.equals(currentData) == false) { - List newTxnList = new ArrayList(); - for (FamilyRevision wtxn : newData) { - StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), - wtxn.getExpireTimestamp()); - newTxnList.add(newTxn); - } - StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); - byte[] newByteData = serialize(wtxnList); - - try { - zkSession.setData(path, newByteData, -1); - } catch (KeeperException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } catch (InterruptedException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } - - } - + switch (mode) { + case REMOVE: + if (dataFound == false) { + throw new IOException( + "The transaction to be removed not found in the data."); + } + LOG.info("Removed trasaction : " + updateTx.toString()); + break; + case KEEP_ALIVE: + if (dataFound == false) { + throw new IOException( + "The transaction to be kept alove not found in the data. It might have been expired."); + } + newData.add(updateTx); + LOG.info("keep alive of transaction : " + updateTx.toString()); + break; + case APPEND: + if (dataFound == true) { + throw new IOException( + "The data to be appended already exists."); + } + newData.add(updateTx); + LOG.info("Added transaction : " + updateTx.toString()); + break; } - /** - * Delete table znodes. - * - * @param tableName the hbase table name - * @throws IOException Signals that an I/O exception has occurred. - */ - void deleteZNodes(String tableName) throws IOException { - String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, - tableName); - deleteRecursively(transactionDataTablePath); + // For serialization purposes. + List newTxnList = new ArrayList(); + for (FamilyRevision wtxn : newData) { + StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), + wtxn.getExpireTimestamp()); + newTxnList.add(newTxn); + } + StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); + byte[] newByteData = serialize(wtxnList); + + Stat stat = null; + try { + stat = zkSession.setData(path, newByteData, -1); + } catch (KeeperException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); + } catch (InterruptedException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); } - void deleteRecursively(String path) throws IOException { - try { - List children = getSession().getChildren(path, false); - if (children.size() != 0) { - for (String child : children) { - deleteRecursively(path + "/" + child); - } - } - getSession().delete(path, -1); - } catch (KeeperException e) { - throw new IOException( - "Exception while deleting path " + path + ".", e); - } catch (InterruptedException e) { - throw new IOException( - "Exception while deleting path " + path + ".", e); - } + if (stat != null) { + LOG.info("Transaction list stored at " + path + "."); } - /** - * This method serializes a given instance of TBase object. - * - * @param obj An instance of TBase - * @return byte array The serialized data. - * @throws IOException - */ - static byte[] serialize(TBase obj) throws IOException { - if (obj == null) - return new byte[0]; - try { - TSerializer serializer = new TSerializer( - new TBinaryProtocol.Factory()); - byte[] bytes = serializer.serialize(obj); - return bytes; - } catch (Exception e) { - throw new IOException("Serialization error: ", e); - } + } + + /** + * Refresh transactions on a given transaction data path. + * + * @param path The path to the transaction data. + * @throws IOException Signals that an I/O exception has occurred. + */ + void refreshTransactions(String path) throws IOException { + List currentData = getTransactionList(path); + List newData = new ArrayList(); + + for (FamilyRevision tranx : currentData) { + if (tranx.getExpireTimestamp() > getTimeStamp()) { + newData.add(tranx); + } } + if (newData.equals(currentData) == false) { + List newTxnList = new ArrayList(); + for (FamilyRevision wtxn : newData) { + StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), + wtxn.getExpireTimestamp()); + newTxnList.add(newTxn); + } + StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); + byte[] newByteData = serialize(wtxnList); + + try { + zkSession.setData(path, newByteData, -1); + } catch (KeeperException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); + } catch (InterruptedException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); + } - /** - * This method deserializes the given byte array into the TBase object. - * - * @param obj An instance of TBase - * @param data Output of deserialization. - * @throws IOException - */ - static void deserialize(TBase obj, byte[] data) throws IOException { - if (data == null || data.length == 0) - return; - try { - TDeserializer deserializer = new TDeserializer( - new TBinaryProtocol.Factory()); - deserializer.deserialize(obj, data); - } catch (Exception e) { - throw new IOException("Deserialization error: " + e.getMessage(), e); - } } - private class ZKWatcher implements Watcher { - public void process(WatchedEvent event) { - switch (event.getState()) { - case Expired: - LOG.info("The client session has expired. Try opening a new " - + "session and connecting again."); - zkSession = null; - break; - default: - - } + } + + /** + * Delete table znodes. + * + * @param tableName the hbase table name + * @throws IOException Signals that an I/O exception has occurred. + */ + void deleteZNodes(String tableName) throws IOException { + String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, + tableName); + deleteRecursively(transactionDataTablePath); + } + + void deleteRecursively(String path) throws IOException { + try { + List children = getSession().getChildren(path, false); + if (children.size() != 0) { + for (String child : children) { + deleteRecursively(path + "/" + child); } + } + getSession().delete(path, -1); + } catch (KeeperException e) { + throw new IOException( + "Exception while deleting path " + path + ".", e); + } catch (InterruptedException e) { + throw new IOException( + "Exception while deleting path " + path + ".", e); + } + } + + /** + * This method serializes a given instance of TBase object. + * + * @param obj An instance of TBase + * @return byte array The serialized data. + * @throws IOException + */ + static byte[] serialize(TBase obj) throws IOException { + if (obj == null) + return new byte[0]; + try { + TSerializer serializer = new TSerializer( + new TBinaryProtocol.Factory()); + byte[] bytes = serializer.serialize(obj); + return bytes; + } catch (Exception e) { + throw new IOException("Serialization error: ", e); + } + } + + + /** + * This method deserializes the given byte array into the TBase object. + * + * @param obj An instance of TBase + * @param data Output of deserialization. + * @throws IOException + */ + static void deserialize(TBase obj, byte[] data) throws IOException { + if (data == null || data.length == 0) + return; + try { + TDeserializer deserializer = new TDeserializer( + new TBinaryProtocol.Factory()); + deserializer.deserialize(obj, data); + } catch (Exception e) { + throw new IOException("Deserialization error: " + e.getMessage(), e); + } + } + + private class ZKWatcher implements Watcher { + public void process(WatchedEvent event) { + switch (event.getState()) { + case Expired: + LOG.info("The client session has expired. Try opening a new " + + "session and connecting again."); + zkSession = null; + break; + default: + + } } + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/LockListener.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/LockListener.java index 3c5f95b..f50b4e2 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/LockListener.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/LockListener.java @@ -27,15 +27,15 @@ * in the package name. */ public interface LockListener { - /** - * call back called when the lock - * is acquired - */ - public void lockAcquired(); + /** + * call back called when the lock + * is acquired + */ + public void lockAcquired(); - /** - * call back called when the lock is - * released. - */ - public void lockReleased(); + /** + * call back called when the lock is + * released. + */ + public void lockReleased(); } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ProtocolSupport.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ProtocolSupport.java index 0f97589..e2dc69c 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ProtocolSupport.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ProtocolSupport.java @@ -40,156 +40,156 @@ * changes in the retry delay, retry count values and package name. */ class ProtocolSupport { - private static final Logger LOG = LoggerFactory.getLogger(ProtocolSupport.class); - - protected final ZooKeeper zookeeper; - private AtomicBoolean closed = new AtomicBoolean(false); - private long retryDelay = 500L; - private int retryCount = 3; - private List acl = ZooDefs.Ids.OPEN_ACL_UNSAFE; - - public ProtocolSupport(ZooKeeper zookeeper) { - this.zookeeper = zookeeper; - } - - /** - * Closes this strategy and releases any ZooKeeper resources; but keeps the - * ZooKeeper instance open - */ - public void close() { - if (closed.compareAndSet(false, true)) { - doClose(); - } - } - - /** - * return zookeeper client instance - * @return zookeeper client instance - */ - public ZooKeeper getZookeeper() { - return zookeeper; - } - - /** - * return the acl its using - * @return the acl. - */ - public List getAcl() { - return acl; - } - - /** - * set the acl - * @param acl the acl to set to - */ - public void setAcl(List acl) { - this.acl = acl; - } - - /** - * get the retry delay in milliseconds - * @return the retry delay - */ - public long getRetryDelay() { - return retryDelay; + private static final Logger LOG = LoggerFactory.getLogger(ProtocolSupport.class); + + protected final ZooKeeper zookeeper; + private AtomicBoolean closed = new AtomicBoolean(false); + private long retryDelay = 500L; + private int retryCount = 3; + private List acl = ZooDefs.Ids.OPEN_ACL_UNSAFE; + + public ProtocolSupport(ZooKeeper zookeeper) { + this.zookeeper = zookeeper; + } + + /** + * Closes this strategy and releases any ZooKeeper resources; but keeps the + * ZooKeeper instance open + */ + public void close() { + if (closed.compareAndSet(false, true)) { + doClose(); } - - /** - * Sets the time waited between retry delays - * @param retryDelay the retry delay - */ - public void setRetryDelay(long retryDelay) { - this.retryDelay = retryDelay; - } - - /** - * Allow derived classes to perform - * some custom closing operations to release resources - */ - protected void doClose() { - } - - - /** - * Perform the given operation, retrying if the connection fails - * @return object. it needs to be cast to the callee's expected - * return type. - */ - protected Object retryOperation(ZooKeeperOperation operation) - throws KeeperException, InterruptedException { - KeeperException exception = null; - for (int i = 0; i < retryCount; i++) { - try { - return operation.execute(); - } catch (KeeperException.SessionExpiredException e) { - LOG.warn("Session expired for: " + zookeeper + " so reconnecting due to: " + e, e); - throw e; - } catch (KeeperException.ConnectionLossException e) { - if (exception == null) { - exception = e; - } - LOG.debug("Attempt " + i + " failed with connection loss so " + - "attempting to reconnect: " + e, e); - retryDelay(i); - } + } + + /** + * return zookeeper client instance + * @return zookeeper client instance + */ + public ZooKeeper getZookeeper() { + return zookeeper; + } + + /** + * return the acl its using + * @return the acl. + */ + public List getAcl() { + return acl; + } + + /** + * set the acl + * @param acl the acl to set to + */ + public void setAcl(List acl) { + this.acl = acl; + } + + /** + * get the retry delay in milliseconds + * @return the retry delay + */ + public long getRetryDelay() { + return retryDelay; + } + + /** + * Sets the time waited between retry delays + * @param retryDelay the retry delay + */ + public void setRetryDelay(long retryDelay) { + this.retryDelay = retryDelay; + } + + /** + * Allow derived classes to perform + * some custom closing operations to release resources + */ + protected void doClose() { + } + + + /** + * Perform the given operation, retrying if the connection fails + * @return object. it needs to be cast to the callee's expected + * return type. + */ + protected Object retryOperation(ZooKeeperOperation operation) + throws KeeperException, InterruptedException { + KeeperException exception = null; + for (int i = 0; i < retryCount; i++) { + try { + return operation.execute(); + } catch (KeeperException.SessionExpiredException e) { + LOG.warn("Session expired for: " + zookeeper + " so reconnecting due to: " + e, e); + throw e; + } catch (KeeperException.ConnectionLossException e) { + if (exception == null) { + exception = e; } - throw exception; + LOG.debug("Attempt " + i + " failed with connection loss so " + + "attempting to reconnect: " + e, e); + retryDelay(i); + } } - - /** - * Ensures that the given path exists with no data, the current - * ACL and no flags - * @param path - */ - protected void ensurePathExists(String path) { - ensureExists(path, null, acl, CreateMode.PERSISTENT); - } - - /** - * Ensures that the given path exists with the given data, ACL and flags - * @param path - * @param acl - * @param flags - */ - protected void ensureExists(final String path, final byte[] data, - final List acl, final CreateMode flags) { - try { - retryOperation(new ZooKeeperOperation() { - public boolean execute() throws KeeperException, InterruptedException { - Stat stat = zookeeper.exists(path, false); - if (stat != null) { - return true; - } - zookeeper.create(path, data, acl, flags); - return true; - } - }); - } catch (KeeperException e) { - LOG.warn("Caught: " + e, e); - } catch (InterruptedException e) { - LOG.warn("Caught: " + e, e); + throw exception; + } + + /** + * Ensures that the given path exists with no data, the current + * ACL and no flags + * @param path + */ + protected void ensurePathExists(String path) { + ensureExists(path, null, acl, CreateMode.PERSISTENT); + } + + /** + * Ensures that the given path exists with the given data, ACL and flags + * @param path + * @param acl + * @param flags + */ + protected void ensureExists(final String path, final byte[] data, + final List acl, final CreateMode flags) { + try { + retryOperation(new ZooKeeperOperation() { + public boolean execute() throws KeeperException, InterruptedException { + Stat stat = zookeeper.exists(path, false); + if (stat != null) { + return true; + } + zookeeper.create(path, data, acl, flags); + return true; } + }); + } catch (KeeperException e) { + LOG.warn("Caught: " + e, e); + } catch (InterruptedException e) { + LOG.warn("Caught: " + e, e); } - - /** - * Returns true if this protocol has been closed - * @return true if this protocol is closed - */ - protected boolean isClosed() { - return closed.get(); - } - - /** - * Performs a retry delay if this is not the first attempt - * @param attemptCount the number of the attempts performed so far - */ - protected void retryDelay(int attemptCount) { - if (attemptCount > 0) { - try { - Thread.sleep(attemptCount * retryDelay); - } catch (InterruptedException e) { - LOG.debug("Failed to sleep: " + e, e); - } - } + } + + /** + * Returns true if this protocol has been closed + * @return true if this protocol is closed + */ + protected boolean isClosed() { + return closed.get(); + } + + /** + * Performs a retry delay if this is not the first attempt + * @param attemptCount the number of the attempts performed so far + */ + protected void retryDelay(int attemptCount) { + if (attemptCount > 0) { + try { + Thread.sleep(attemptCount * retryDelay); + } catch (InterruptedException e) { + LOG.debug("Failed to sleep: " + e, e); + } } + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/WriteLock.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/WriteLock.java index 6838fe9..d991f43 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/WriteLock.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/WriteLock.java @@ -46,258 +46,258 @@ * made is a TODO for sorting using suffixes and the package name. */ public class WriteLock extends ProtocolSupport { - private static final Logger LOG = LoggerFactory.getLogger(WriteLock.class); + private static final Logger LOG = LoggerFactory.getLogger(WriteLock.class); - private final String dir; - private String id; - private ZNodeName idName; - private String ownerId; - private String lastChildId; - private byte[] data = {0x12, 0x34}; - private LockListener callback; - private LockZooKeeperOperation zop; + private final String dir; + private String id; + private ZNodeName idName; + private String ownerId; + private String lastChildId; + private byte[] data = {0x12, 0x34}; + private LockListener callback; + private LockZooKeeperOperation zop; - /** - * zookeeper contructor for writelock - * @param zookeeper zookeeper client instance - * @param dir the parent path you want to use for locking - * @param acl the acls that you want to use for all the paths, - * if null world read/write is used. - */ - public WriteLock(ZooKeeper zookeeper, String dir, List acl) { - super(zookeeper); - this.dir = dir; - if (acl != null) { - setAcl(acl); - } - this.zop = new LockZooKeeperOperation(); + /** + * zookeeper contructor for writelock + * @param zookeeper zookeeper client instance + * @param dir the parent path you want to use for locking + * @param acl the acls that you want to use for all the paths, + * if null world read/write is used. + */ + public WriteLock(ZooKeeper zookeeper, String dir, List acl) { + super(zookeeper); + this.dir = dir; + if (acl != null) { + setAcl(acl); } + this.zop = new LockZooKeeperOperation(); + } - /** - * zookeeper contructor for writelock with callback - * @param zookeeper the zookeeper client instance - * @param dir the parent path you want to use for locking - * @param acl the acls that you want to use for all the paths - * @param callback the call back instance - */ - public WriteLock(ZooKeeper zookeeper, String dir, List acl, - LockListener callback) { - this(zookeeper, dir, acl); - this.callback = callback; - } + /** + * zookeeper contructor for writelock with callback + * @param zookeeper the zookeeper client instance + * @param dir the parent path you want to use for locking + * @param acl the acls that you want to use for all the paths + * @param callback the call back instance + */ + public WriteLock(ZooKeeper zookeeper, String dir, List acl, + LockListener callback) { + this(zookeeper, dir, acl); + this.callback = callback; + } - /** - * return the current locklistener - * @return the locklistener - */ - public LockListener getLockListener() { - return this.callback; - } + /** + * return the current locklistener + * @return the locklistener + */ + public LockListener getLockListener() { + return this.callback; + } - /** - * register a different call back listener - * @param callback the call back instance - */ - public void setLockListener(LockListener callback) { - this.callback = callback; - } + /** + * register a different call back listener + * @param callback the call back instance + */ + public void setLockListener(LockListener callback) { + this.callback = callback; + } - /** - * Removes the lock or associated znode if - * you no longer require the lock. this also - * removes your request in the queue for locking - * in case you do not already hold the lock. - * @throws RuntimeException throws a runtime exception - * if it cannot connect to zookeeper. - */ - public synchronized void unlock() throws RuntimeException { + /** + * Removes the lock or associated znode if + * you no longer require the lock. this also + * removes your request in the queue for locking + * in case you do not already hold the lock. + * @throws RuntimeException throws a runtime exception + * if it cannot connect to zookeeper. + */ + public synchronized void unlock() throws RuntimeException { - if (!isClosed() && id != null) { - // we don't need to retry this operation in the case of failure - // as ZK will remove ephemeral files and we don't wanna hang - // this process when closing if we cannot reconnect to ZK - try { + if (!isClosed() && id != null) { + // we don't need to retry this operation in the case of failure + // as ZK will remove ephemeral files and we don't wanna hang + // this process when closing if we cannot reconnect to ZK + try { - ZooKeeperOperation zopdel = new ZooKeeperOperation() { - public boolean execute() throws KeeperException, - InterruptedException { - zookeeper.delete(id, -1); - return Boolean.TRUE; - } - }; - zopdel.execute(); - } catch (InterruptedException e) { - LOG.warn("Caught: " + e, e); - //set that we have been interrupted. - Thread.currentThread().interrupt(); - } catch (KeeperException.NoNodeException e) { - // do nothing - } catch (KeeperException e) { - LOG.warn("Caught: " + e, e); - throw (RuntimeException) new RuntimeException(e.getMessage()). - initCause(e); - } finally { - if (callback != null) { - callback.lockReleased(); - } - id = null; - } + ZooKeeperOperation zopdel = new ZooKeeperOperation() { + public boolean execute() throws KeeperException, + InterruptedException { + zookeeper.delete(id, -1); + return Boolean.TRUE; + } + }; + zopdel.execute(); + } catch (InterruptedException e) { + LOG.warn("Caught: " + e, e); + //set that we have been interrupted. + Thread.currentThread().interrupt(); + } catch (KeeperException.NoNodeException e) { + // do nothing + } catch (KeeperException e) { + LOG.warn("Caught: " + e, e); + throw (RuntimeException) new RuntimeException(e.getMessage()). + initCause(e); + } finally { + if (callback != null) { + callback.lockReleased(); } + id = null; + } } + } - /** - * the watcher called on - * getting watch while watching - * my predecessor + /** + * the watcher called on + * getting watch while watching + * my predecessor + */ + private class LockWatcher implements Watcher { + public void process(WatchedEvent event) { + // lets either become the leader or watch the new/updated node + LOG.debug("Watcher fired on path: " + event.getPath() + " state: " + + event.getState() + " type " + event.getType()); + try { + lock(); + } catch (Exception e) { + LOG.warn("Failed to acquire lock: " + e, e); + } + } + } + + /** + * a zoookeeper operation that is mainly responsible + * for all the magic required for locking. + */ + private class LockZooKeeperOperation implements ZooKeeperOperation { + + /** find if we have been created earler if not create our node + * + * @param prefix the prefix node + * @param zookeeper teh zookeeper client + * @param dir the dir paretn + * @throws KeeperException + * @throws InterruptedException */ - private class LockWatcher implements Watcher { - public void process(WatchedEvent event) { - // lets either become the leader or watch the new/updated node - LOG.debug("Watcher fired on path: " + event.getPath() + " state: " + - event.getState() + " type " + event.getType()); - try { - lock(); - } catch (Exception e) { - LOG.warn("Failed to acquire lock: " + e, e); - } + private void findPrefixInChildren(String prefix, ZooKeeper zookeeper, String dir) + throws KeeperException, InterruptedException { + List names = zookeeper.getChildren(dir, false); + for (String name : names) { + if (name.startsWith(prefix)) { + id = name; + if (LOG.isDebugEnabled()) { + LOG.debug("Found id created last time: " + id); + } + break; } + } + if (id == null) { + id = zookeeper.create(dir + "/" + prefix, data, + getAcl(), EPHEMERAL_SEQUENTIAL); + + if (LOG.isDebugEnabled()) { + LOG.debug("Created id: " + id); + } + } + } /** - * a zoookeeper operation that is mainly responsible - * for all the magic required for locking. + * the command that is run and retried for actually + * obtaining the lock + * @return if the command was successful or not */ - private class LockZooKeeperOperation implements ZooKeeperOperation { - - /** find if we have been created earler if not create our node - * - * @param prefix the prefix node - * @param zookeeper teh zookeeper client - * @param dir the dir paretn - * @throws KeeperException - * @throws InterruptedException - */ - private void findPrefixInChildren(String prefix, ZooKeeper zookeeper, String dir) - throws KeeperException, InterruptedException { - List names = zookeeper.getChildren(dir, false); + public boolean execute() throws KeeperException, InterruptedException { + do { + if (id == null) { + long sessionId = zookeeper.getSessionId(); + String prefix = "x-" + sessionId + "-"; + // lets try look up the current ID if we failed + // in the middle of creating the znode + findPrefixInChildren(prefix, zookeeper, dir); + idName = new ZNodeName(id); + } + if (id != null) { + List names = zookeeper.getChildren(dir, false); + if (names.isEmpty()) { + LOG.warn("No children in: " + dir + " when we've just " + + "created one! Lets recreate it..."); + // lets force the recreation of the id + id = null; + } else { + // lets sort them explicitly (though they do seem to come back in order ususally :) + SortedSet sortedNames = new TreeSet(); for (String name : names) { - if (name.startsWith(prefix)) { - id = name; - if (LOG.isDebugEnabled()) { - LOG.debug("Found id created last time: " + id); - } - break; - } + //TODO: Just use the suffix to sort. + sortedNames.add(new ZNodeName(dir + "/" + name)); } - if (id == null) { - id = zookeeper.create(dir + "/" + prefix, data, - getAcl(), EPHEMERAL_SEQUENTIAL); - - if (LOG.isDebugEnabled()) { - LOG.debug("Created id: " + id); - } - } - - } - - /** - * the command that is run and retried for actually - * obtaining the lock - * @return if the command was successful or not - */ - public boolean execute() throws KeeperException, InterruptedException { - do { - if (id == null) { - long sessionId = zookeeper.getSessionId(); - String prefix = "x-" + sessionId + "-"; - // lets try look up the current ID if we failed - // in the middle of creating the znode - findPrefixInChildren(prefix, zookeeper, dir); - idName = new ZNodeName(id); - } - if (id != null) { - List names = zookeeper.getChildren(dir, false); - if (names.isEmpty()) { - LOG.warn("No children in: " + dir + " when we've just " + - "created one! Lets recreate it..."); - // lets force the recreation of the id - id = null; - } else { - // lets sort them explicitly (though they do seem to come back in order ususally :) - SortedSet sortedNames = new TreeSet(); - for (String name : names) { - //TODO: Just use the suffix to sort. - sortedNames.add(new ZNodeName(dir + "/" + name)); - } - ownerId = sortedNames.first().getName(); - SortedSet lessThanMe = sortedNames.headSet(idName); - if (!lessThanMe.isEmpty()) { - ZNodeName lastChildName = lessThanMe.last(); - lastChildId = lastChildName.getName(); - if (LOG.isDebugEnabled()) { - LOG.debug("watching less than me node: " + lastChildId); - } - Stat stat = zookeeper.exists(lastChildId, new LockWatcher()); - if (stat != null) { - return Boolean.FALSE; - } else { - LOG.warn("Could not find the" + - " stats for less than me: " + lastChildName.getName()); - } - } else { - if (isOwner()) { - if (callback != null) { - callback.lockAcquired(); - } - return Boolean.TRUE; - } - } - } + ownerId = sortedNames.first().getName(); + SortedSet lessThanMe = sortedNames.headSet(idName); + if (!lessThanMe.isEmpty()) { + ZNodeName lastChildName = lessThanMe.last(); + lastChildId = lastChildName.getName(); + if (LOG.isDebugEnabled()) { + LOG.debug("watching less than me node: " + lastChildId); + } + Stat stat = zookeeper.exists(lastChildId, new LockWatcher()); + if (stat != null) { + return Boolean.FALSE; + } else { + LOG.warn("Could not find the" + + " stats for less than me: " + lastChildName.getName()); + } + } else { + if (isOwner()) { + if (callback != null) { + callback.lockAcquired(); } + return Boolean.TRUE; + } } - while (id == null); - return Boolean.FALSE; + } } + } + while (id == null); + return Boolean.FALSE; } + } - ; + ; - /** - * Attempts to acquire the exclusive write lock returning whether or not it was - * acquired. Note that the exclusive lock may be acquired some time later after - * this method has been invoked due to the current lock owner going away. - */ - public synchronized boolean lock() throws KeeperException, InterruptedException { - if (isClosed()) { - return false; - } - ensurePathExists(dir); - - return (Boolean) retryOperation(zop); + /** + * Attempts to acquire the exclusive write lock returning whether or not it was + * acquired. Note that the exclusive lock may be acquired some time later after + * this method has been invoked due to the current lock owner going away. + */ + public synchronized boolean lock() throws KeeperException, InterruptedException { + if (isClosed()) { + return false; } + ensurePathExists(dir); - /** - * return the parent dir for lock - * @return the parent dir used for locks. - */ - public String getDir() { - return dir; - } + return (Boolean) retryOperation(zop); + } - /** - * Returns true if this node is the owner of the - * lock (or the leader) - */ - public boolean isOwner() { - return id != null && ownerId != null && id.equals(ownerId); - } + /** + * return the parent dir for lock + * @return the parent dir used for locks. + */ + public String getDir() { + return dir; + } - /** - * return the id for this lock - * @return the id for this lock - */ - public String getId() { - return this.id; - } + /** + * Returns true if this node is the owner of the + * lock (or the leader) + */ + public boolean isOwner() { + return id != null && ownerId != null && id.equals(ownerId); + } + + /** + * return the id for this lock + * @return the id for this lock + */ + public String getId() { + return this.id; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZNodeName.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZNodeName.java index 51f0f18..b84a72a 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZNodeName.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZNodeName.java @@ -29,85 +29,85 @@ * change in package name. */ public class ZNodeName implements Comparable { - private final String name; - private String prefix; - private int sequence = -1; - private static final Logger LOG = LoggerFactory.getLogger(ZNodeName.class); + private final String name; + private String prefix; + private int sequence = -1; + private static final Logger LOG = LoggerFactory.getLogger(ZNodeName.class); - public ZNodeName(String name) { - if (name == null) { - throw new NullPointerException("id cannot be null"); - } - this.name = name; - this.prefix = name; - int idx = name.lastIndexOf('-'); - if (idx >= 0) { - this.prefix = name.substring(0, idx); - try { - this.sequence = Integer.parseInt(name.substring(idx + 1)); - // If an exception occurred we misdetected a sequence suffix, - // so return -1. - } catch (NumberFormatException e) { - LOG.info("Number format exception for " + idx, e); - } catch (ArrayIndexOutOfBoundsException e) { - LOG.info("Array out of bounds for " + idx, e); - } - } + public ZNodeName(String name) { + if (name == null) { + throw new NullPointerException("id cannot be null"); } - - @Override - public String toString() { - return name.toString(); + this.name = name; + this.prefix = name; + int idx = name.lastIndexOf('-'); + if (idx >= 0) { + this.prefix = name.substring(0, idx); + try { + this.sequence = Integer.parseInt(name.substring(idx + 1)); + // If an exception occurred we misdetected a sequence suffix, + // so return -1. + } catch (NumberFormatException e) { + LOG.info("Number format exception for " + idx, e); + } catch (ArrayIndexOutOfBoundsException e) { + LOG.info("Array out of bounds for " + idx, e); + } } + } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + @Override + public String toString() { + return name.toString(); + } - ZNodeName sequence = (ZNodeName) o; + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; - if (!name.equals(sequence.name)) return false; + ZNodeName sequence = (ZNodeName) o; - return true; - } + if (!name.equals(sequence.name)) return false; - @Override - public int hashCode() { - return name.hashCode() + 37; - } + return true; + } - public int compareTo(ZNodeName that) { - int answer = this.prefix.compareTo(that.prefix); - if (answer == 0) { - int s1 = this.sequence; - int s2 = that.sequence; - if (s1 == -1 && s2 == -1) { - return this.name.compareTo(that.name); - } - answer = s1 == -1 ? 1 : s2 == -1 ? -1 : s1 - s2; - } - return answer; - } + @Override + public int hashCode() { + return name.hashCode() + 37; + } - /** - * Returns the name of the znode - */ - public String getName() { - return name; + public int compareTo(ZNodeName that) { + int answer = this.prefix.compareTo(that.prefix); + if (answer == 0) { + int s1 = this.sequence; + int s2 = that.sequence; + if (s1 == -1 && s2 == -1) { + return this.name.compareTo(that.name); + } + answer = s1 == -1 ? 1 : s2 == -1 ? -1 : s1 - s2; } + return answer; + } - /** - * Returns the sequence number - */ - public int getZNodeName() { - return sequence; - } + /** + * Returns the name of the znode + */ + public String getName() { + return name; + } - /** - * Returns the text prefix before the sequence number - */ - public String getPrefix() { - return prefix; - } + /** + * Returns the sequence number + */ + public int getZNodeName() { + return sequence; + } + + /** + * Returns the text prefix before the sequence number + */ + public String getPrefix() { + return prefix; + } } diff --git a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java index 9291125..317cf31 100644 --- a/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java +++ b/hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java @@ -29,13 +29,13 @@ */ public interface ZooKeeperOperation { - /** - * Performs the operation - which may be involved multiple times if the connection - * to ZooKeeper closes during this operation - * - * @return the result of the operation or null - * @throws KeeperException - * @throws InterruptedException - */ - public boolean execute() throws KeeperException, InterruptedException; + /** + * Performs the operation - which may be involved multiple times if the connection + * to ZooKeeper closes during this operation + * + * @return the result of the operation or null + * @throws KeeperException + * @throws InterruptedException + */ + public boolean execute() throws KeeperException, InterruptedException; } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/ManyMiniCluster.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/ManyMiniCluster.java index 8557cda..4f8f357 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/ManyMiniCluster.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/ManyMiniCluster.java @@ -44,327 +44,327 @@ */ public class ManyMiniCluster { - //MR stuff - private boolean miniMRClusterEnabled; - private MiniMRCluster mrCluster; - private int numTaskTrackers; - private JobConf jobConf; - - //HBase stuff - private boolean miniHBaseClusterEnabled; - private MiniHBaseCluster hbaseCluster; - private String hbaseRoot; - private Configuration hbaseConf; - private String hbaseDir; - - //ZK Stuff - private boolean miniZookeeperClusterEnabled; - private MiniZooKeeperCluster zookeeperCluster; - private int zookeeperPort; - private String zookeeperDir; - - //DFS Stuff - private MiniDFSCluster dfsCluster; - - //Hive Stuff - private boolean miniHiveMetastoreEnabled; - private HiveConf hiveConf; - private HiveMetaStoreClient hiveMetaStoreClient; - - private final File workDir; - private boolean started = false; - - - /** - * create a cluster instance using a builder which will expose configurable options - * @param workDir working directory ManyMiniCluster will use for all of it's *Minicluster instances - * @return a Builder instance - */ - public static Builder create(File workDir) { - return new Builder(workDir); - } - - private ManyMiniCluster(Builder b) { - workDir = b.workDir; - numTaskTrackers = b.numTaskTrackers; - hiveConf = b.hiveConf; - jobConf = b.jobConf; - hbaseConf = b.hbaseConf; - miniMRClusterEnabled = b.miniMRClusterEnabled; - miniHBaseClusterEnabled = b.miniHBaseClusterEnabled; - miniHiveMetastoreEnabled = b.miniHiveMetastoreEnabled; - miniZookeeperClusterEnabled = b.miniZookeeperClusterEnabled; - } - - protected synchronized void start() { - try { - if (!started) { - FileUtil.fullyDelete(workDir); - if (miniMRClusterEnabled) { - setupMRCluster(); - } - if (miniZookeeperClusterEnabled || miniHBaseClusterEnabled) { - miniZookeeperClusterEnabled = true; - setupZookeeper(); - } - if (miniHBaseClusterEnabled) { - setupHBaseCluster(); - } - if (miniHiveMetastoreEnabled) { - setUpMetastore(); - } - } - } catch (Exception e) { - throw new IllegalStateException("Failed to setup cluster", e); + //MR stuff + private boolean miniMRClusterEnabled; + private MiniMRCluster mrCluster; + private int numTaskTrackers; + private JobConf jobConf; + + //HBase stuff + private boolean miniHBaseClusterEnabled; + private MiniHBaseCluster hbaseCluster; + private String hbaseRoot; + private Configuration hbaseConf; + private String hbaseDir; + + //ZK Stuff + private boolean miniZookeeperClusterEnabled; + private MiniZooKeeperCluster zookeeperCluster; + private int zookeeperPort; + private String zookeeperDir; + + //DFS Stuff + private MiniDFSCluster dfsCluster; + + //Hive Stuff + private boolean miniHiveMetastoreEnabled; + private HiveConf hiveConf; + private HiveMetaStoreClient hiveMetaStoreClient; + + private final File workDir; + private boolean started = false; + + + /** + * create a cluster instance using a builder which will expose configurable options + * @param workDir working directory ManyMiniCluster will use for all of it's *Minicluster instances + * @return a Builder instance + */ + public static Builder create(File workDir) { + return new Builder(workDir); + } + + private ManyMiniCluster(Builder b) { + workDir = b.workDir; + numTaskTrackers = b.numTaskTrackers; + hiveConf = b.hiveConf; + jobConf = b.jobConf; + hbaseConf = b.hbaseConf; + miniMRClusterEnabled = b.miniMRClusterEnabled; + miniHBaseClusterEnabled = b.miniHBaseClusterEnabled; + miniHiveMetastoreEnabled = b.miniHiveMetastoreEnabled; + miniZookeeperClusterEnabled = b.miniZookeeperClusterEnabled; + } + + protected synchronized void start() { + try { + if (!started) { + FileUtil.fullyDelete(workDir); + if (miniMRClusterEnabled) { + setupMRCluster(); } - } - - protected synchronized void stop() { - if (hbaseCluster != null) { - HConnectionManager.deleteAllConnections(true); - try { - hbaseCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - hbaseCluster = null; - } - if (zookeeperCluster != null) { - try { - zookeeperCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - zookeeperCluster = null; + if (miniZookeeperClusterEnabled || miniHBaseClusterEnabled) { + miniZookeeperClusterEnabled = true; + setupZookeeper(); } - if (mrCluster != null) { - try { - mrCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - mrCluster = null; + if (miniHBaseClusterEnabled) { + setupHBaseCluster(); } - if (dfsCluster != null) { - try { - dfsCluster.getFileSystem().close(); - dfsCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - dfsCluster = null; + if (miniHiveMetastoreEnabled) { + setUpMetastore(); } - try { - FileSystem.closeAll(); - } catch (IOException e) { - e.printStackTrace(); - } - started = false; + } + } catch (Exception e) { + throw new IllegalStateException("Failed to setup cluster", e); } - - /** - * @return Configuration of mini HBase cluster - */ - public Configuration getHBaseConf() { - return HBaseConfiguration.create(hbaseConf); + } + + protected synchronized void stop() { + if (hbaseCluster != null) { + HConnectionManager.deleteAllConnections(true); + try { + hbaseCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + hbaseCluster = null; } - - /** - * @return Configuration of mini MR cluster - */ - public Configuration getJobConf() { - return new Configuration(jobConf); + if (zookeeperCluster != null) { + try { + zookeeperCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + zookeeperCluster = null; } - - /** - * @return Configuration of Hive Metastore, this is a standalone not a daemon - */ - public HiveConf getHiveConf() { - return new HiveConf(hiveConf); + if (mrCluster != null) { + try { + mrCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + mrCluster = null; } - - /** - * @return Filesystem used by MiniMRCluster and MiniHBaseCluster - */ - public FileSystem getFileSystem() { - try { - return FileSystem.get(jobConf); - } catch (IOException e) { - throw new IllegalStateException("Failed to get FileSystem", e); - } + if (dfsCluster != null) { + try { + dfsCluster.getFileSystem().close(); + dfsCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + dfsCluster = null; } - - /** - * @return Metastore client instance - */ - public HiveMetaStoreClient getHiveMetaStoreClient() { - return hiveMetaStoreClient; + try { + FileSystem.closeAll(); + } catch (IOException e) { + e.printStackTrace(); } - - private void setupMRCluster() { - try { - final int jobTrackerPort = findFreePort(); - final int taskTrackerPort = findFreePort(); - - if (jobConf == null) - jobConf = new JobConf(); - - jobConf.setInt("mapred.submit.replication", 1); - jobConf.set("yarn.scheduler.capacity.root.queues", "default"); - jobConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - //conf.set("hadoop.job.history.location",new File(workDir).getAbsolutePath()+"/history"); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - - mrCluster = new MiniMRCluster(jobTrackerPort, - taskTrackerPort, - numTaskTrackers, - getFileSystem().getUri().toString(), - numTaskTrackers, - null, - null, - null, - jobConf); - - jobConf = mrCluster.createJobConf(); - } catch (IOException e) { - throw new IllegalStateException("Failed to Setup MR Cluster", e); - } + started = false; + } + + /** + * @return Configuration of mini HBase cluster + */ + public Configuration getHBaseConf() { + return HBaseConfiguration.create(hbaseConf); + } + + /** + * @return Configuration of mini MR cluster + */ + public Configuration getJobConf() { + return new Configuration(jobConf); + } + + /** + * @return Configuration of Hive Metastore, this is a standalone not a daemon + */ + public HiveConf getHiveConf() { + return new HiveConf(hiveConf); + } + + /** + * @return Filesystem used by MiniMRCluster and MiniHBaseCluster + */ + public FileSystem getFileSystem() { + try { + return FileSystem.get(jobConf); + } catch (IOException e) { + throw new IllegalStateException("Failed to get FileSystem", e); } - - private void setupZookeeper() { - try { - zookeeperDir = new File(workDir, "zk").getAbsolutePath(); - zookeeperPort = findFreePort(); - zookeeperCluster = new MiniZooKeeperCluster(); - zookeeperCluster.setDefaultClientPort(zookeeperPort); - zookeeperCluster.startup(new File(zookeeperDir)); - } catch (Exception e) { - throw new IllegalStateException("Failed to Setup Zookeeper Cluster", e); - } + } + + /** + * @return Metastore client instance + */ + public HiveMetaStoreClient getHiveMetaStoreClient() { + return hiveMetaStoreClient; + } + + private void setupMRCluster() { + try { + final int jobTrackerPort = findFreePort(); + final int taskTrackerPort = findFreePort(); + + if (jobConf == null) + jobConf = new JobConf(); + + jobConf.setInt("mapred.submit.replication", 1); + jobConf.set("yarn.scheduler.capacity.root.queues", "default"); + jobConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + //conf.set("hadoop.job.history.location",new File(workDir).getAbsolutePath()+"/history"); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + + mrCluster = new MiniMRCluster(jobTrackerPort, + taskTrackerPort, + numTaskTrackers, + getFileSystem().getUri().toString(), + numTaskTrackers, + null, + null, + null, + jobConf); + + jobConf = mrCluster.createJobConf(); + } catch (IOException e) { + throw new IllegalStateException("Failed to Setup MR Cluster", e); } - - private void setupHBaseCluster() { - final int numRegionServers = 1; - - try { - hbaseDir = new File(workDir, "hbase").toString(); - hbaseDir = hbaseDir.replaceAll("\\\\", "/"); - hbaseRoot = "file://" + hbaseDir; - - if (hbaseConf == null) - hbaseConf = HBaseConfiguration.create(); - - hbaseConf.set("hbase.rootdir", hbaseRoot); - hbaseConf.set("hbase.master", "local"); - hbaseConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zookeeperPort); - hbaseConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1"); - hbaseConf.setInt("hbase.master.port", findFreePort()); - hbaseConf.setInt("hbase.master.info.port", -1); - hbaseConf.setInt("hbase.regionserver.port", findFreePort()); - hbaseConf.setInt("hbase.regionserver.info.port", -1); - - hbaseCluster = new MiniHBaseCluster(hbaseConf, numRegionServers); - hbaseConf.set("hbase.master", hbaseCluster.getMaster().getServerName().getHostAndPort()); - //opening the META table ensures that cluster is running - new HTable(hbaseConf, HConstants.META_TABLE_NAME); - } catch (Exception e) { - throw new IllegalStateException("Failed to setup HBase Cluster", e); - } + } + + private void setupZookeeper() { + try { + zookeeperDir = new File(workDir, "zk").getAbsolutePath(); + zookeeperPort = findFreePort(); + zookeeperCluster = new MiniZooKeeperCluster(); + zookeeperCluster.setDefaultClientPort(zookeeperPort); + zookeeperCluster.startup(new File(zookeeperDir)); + } catch (Exception e) { + throw new IllegalStateException("Failed to Setup Zookeeper Cluster", e); } - - private void setUpMetastore() throws Exception { - if (hiveConf == null) - hiveConf = new HiveConf(this.getClass()); - - //The default org.apache.hadoop.hive.ql.hooks.PreExecutePrinter hook - //is present only in the ql/test directory - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, - "jdbc:derby:" + new File(workDir + "/metastore_db") + ";create=true"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.toString(), - new File(workDir, "warehouse").toString()); - //set where derby logs - File derbyLogFile = new File(workDir + "/derby.log"); - derbyLogFile.createNewFile(); - System.setProperty("derby.stream.error.file", derbyLogFile.getPath()); + } + + private void setupHBaseCluster() { + final int numRegionServers = 1; + + try { + hbaseDir = new File(workDir, "hbase").toString(); + hbaseDir = hbaseDir.replaceAll("\\\\", "/"); + hbaseRoot = "file://" + hbaseDir; + + if (hbaseConf == null) + hbaseConf = HBaseConfiguration.create(); + + hbaseConf.set("hbase.rootdir", hbaseRoot); + hbaseConf.set("hbase.master", "local"); + hbaseConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zookeeperPort); + hbaseConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1"); + hbaseConf.setInt("hbase.master.port", findFreePort()); + hbaseConf.setInt("hbase.master.info.port", -1); + hbaseConf.setInt("hbase.regionserver.port", findFreePort()); + hbaseConf.setInt("hbase.regionserver.info.port", -1); + + hbaseCluster = new MiniHBaseCluster(hbaseConf, numRegionServers); + hbaseConf.set("hbase.master", hbaseCluster.getMaster().getServerName().getHostAndPort()); + //opening the META table ensures that cluster is running + new HTable(hbaseConf, HConstants.META_TABLE_NAME); + } catch (Exception e) { + throw new IllegalStateException("Failed to setup HBase Cluster", e); + } + } + + private void setUpMetastore() throws Exception { + if (hiveConf == null) + hiveConf = new HiveConf(this.getClass()); + + //The default org.apache.hadoop.hive.ql.hooks.PreExecutePrinter hook + //is present only in the ql/test directory + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, + "jdbc:derby:" + new File(workDir + "/metastore_db") + ";create=true"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.toString(), + new File(workDir, "warehouse").toString()); + //set where derby logs + File derbyLogFile = new File(workDir + "/derby.log"); + derbyLogFile.createNewFile(); + System.setProperty("derby.stream.error.file", derbyLogFile.getPath()); // Driver driver = new Driver(hiveConf); // SessionState.start(new CliSessionState(hiveConf)); - hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); - } - - private static int findFreePort() throws IOException { - ServerSocket server = new ServerSocket(0); - int port = server.getLocalPort(); - server.close(); - return port; - } + hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); + } - public static class Builder { - private File workDir; - private int numTaskTrackers = 1; - private JobConf jobConf; - private Configuration hbaseConf; - private HiveConf hiveConf; + private static int findFreePort() throws IOException { + ServerSocket server = new ServerSocket(0); + int port = server.getLocalPort(); + server.close(); + return port; + } - private boolean miniMRClusterEnabled = true; - private boolean miniHBaseClusterEnabled = true; - private boolean miniHiveMetastoreEnabled = true; - private boolean miniZookeeperClusterEnabled = true; + public static class Builder { + private File workDir; + private int numTaskTrackers = 1; + private JobConf jobConf; + private Configuration hbaseConf; + private HiveConf hiveConf; + private boolean miniMRClusterEnabled = true; + private boolean miniHBaseClusterEnabled = true; + private boolean miniHiveMetastoreEnabled = true; + private boolean miniZookeeperClusterEnabled = true; - private Builder(File workDir) { - this.workDir = workDir; - } - public Builder numTaskTrackers(int num) { - numTaskTrackers = num; - return this; - } + private Builder(File workDir) { + this.workDir = workDir; + } - public Builder jobConf(JobConf jobConf) { - this.jobConf = jobConf; - return this; - } + public Builder numTaskTrackers(int num) { + numTaskTrackers = num; + return this; + } - public Builder hbaseConf(Configuration hbaseConf) { - this.hbaseConf = hbaseConf; - return this; - } + public Builder jobConf(JobConf jobConf) { + this.jobConf = jobConf; + return this; + } - public Builder hiveConf(HiveConf hiveConf) { - this.hiveConf = hiveConf; - return this; - } + public Builder hbaseConf(Configuration hbaseConf) { + this.hbaseConf = hbaseConf; + return this; + } - public Builder miniMRClusterEnabled(boolean enabled) { - this.miniMRClusterEnabled = enabled; - return this; - } + public Builder hiveConf(HiveConf hiveConf) { + this.hiveConf = hiveConf; + return this; + } - public Builder miniHBaseClusterEnabled(boolean enabled) { - this.miniHBaseClusterEnabled = enabled; - return this; - } + public Builder miniMRClusterEnabled(boolean enabled) { + this.miniMRClusterEnabled = enabled; + return this; + } - public Builder miniZookeeperClusterEnabled(boolean enabled) { - this.miniZookeeperClusterEnabled = enabled; - return this; - } + public Builder miniHBaseClusterEnabled(boolean enabled) { + this.miniHBaseClusterEnabled = enabled; + return this; + } - public Builder miniHiveMetastoreEnabled(boolean enabled) { - this.miniHiveMetastoreEnabled = enabled; - return this; - } + public Builder miniZookeeperClusterEnabled(boolean enabled) { + this.miniZookeeperClusterEnabled = enabled; + return this; + } + public Builder miniHiveMetastoreEnabled(boolean enabled) { + this.miniHiveMetastoreEnabled = enabled; + return this; + } - public ManyMiniCluster build() { - return new ManyMiniCluster(this); - } + public ManyMiniCluster build() { + return new ManyMiniCluster(this); } + + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/SkeletonHBaseTest.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/SkeletonHBaseTest.java index aefe450..f52b020 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/SkeletonHBaseTest.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/SkeletonHBaseTest.java @@ -43,195 +43,195 @@ */ public abstract class SkeletonHBaseTest { - protected static String TEST_DIR = "/tmp/build/test/data/"; - - protected final static String DEFAULT_CONTEXT_HANDLE = "default"; - - protected static Map contextMap = new HashMap(); - protected static Set tableNames = new HashSet(); - - /** - * Allow tests to alter the default MiniCluster configuration. - * (requires static initializer block as all setup here is static) - */ - protected static Configuration testConf = null; - - protected void createTable(String tableName, String[] families) { - try { - HBaseAdmin admin = new HBaseAdmin(getHbaseConf()); - HTableDescriptor tableDesc = new HTableDescriptor(tableName); - for (String family : families) { - HColumnDescriptor columnDescriptor = new HColumnDescriptor(family); - tableDesc.addFamily(columnDescriptor); - } - admin.createTable(tableDesc); - } catch (Exception e) { - e.printStackTrace(); - throw new IllegalStateException(e); - } - + protected static String TEST_DIR = "/tmp/build/test/data/"; + + protected final static String DEFAULT_CONTEXT_HANDLE = "default"; + + protected static Map contextMap = new HashMap(); + protected static Set tableNames = new HashSet(); + + /** + * Allow tests to alter the default MiniCluster configuration. + * (requires static initializer block as all setup here is static) + */ + protected static Configuration testConf = null; + + protected void createTable(String tableName, String[] families) { + try { + HBaseAdmin admin = new HBaseAdmin(getHbaseConf()); + HTableDescriptor tableDesc = new HTableDescriptor(tableName); + for (String family : families) { + HColumnDescriptor columnDescriptor = new HColumnDescriptor(family); + tableDesc.addFamily(columnDescriptor); + } + admin.createTable(tableDesc); + } catch (Exception e) { + e.printStackTrace(); + throw new IllegalStateException(e); } - protected String newTableName(String prefix) { - String name = null; - int tries = 100; - do { - name = prefix + "_" + Math.abs(new Random().nextLong()); - } while (tableNames.contains(name) && --tries > 0); - if (tableNames.contains(name)) - throw new IllegalStateException("Couldn't find a unique table name, tableNames size: " + tableNames.size()); - tableNames.add(name); - return name; + } + + protected String newTableName(String prefix) { + String name = null; + int tries = 100; + do { + name = prefix + "_" + Math.abs(new Random().nextLong()); + } while (tableNames.contains(name) && --tries > 0); + if (tableNames.contains(name)) + throw new IllegalStateException("Couldn't find a unique table name, tableNames size: " + tableNames.size()); + tableNames.add(name); + return name; + } + + + /** + * startup an hbase cluster instance before a test suite runs + */ + @BeforeClass + public static void setup() { + if (!contextMap.containsKey(getContextHandle())) + contextMap.put(getContextHandle(), new Context(getContextHandle())); + + contextMap.get(getContextHandle()).start(); + } + + /** + * shutdown an hbase cluster instance ant the end of the test suite + */ + @AfterClass + public static void tearDown() { + contextMap.get(getContextHandle()).stop(); + } + + /** + * override this with a different context handle if tests suites are run simultaneously + * and ManyMiniCluster instances shouldn't be shared + * @return + */ + public static String getContextHandle() { + return DEFAULT_CONTEXT_HANDLE; + } + + /** + * @return working directory for a given test context, which normally is a test suite + */ + public String getTestDir() { + return contextMap.get(getContextHandle()).getTestDir(); + } + + /** + * @return ManyMiniCluster instance + */ + public ManyMiniCluster getCluster() { + return contextMap.get(getContextHandle()).getCluster(); + } + + /** + * @return configuration of MiniHBaseCluster + */ + public Configuration getHbaseConf() { + return contextMap.get(getContextHandle()).getHbaseConf(); + } + + /** + * @return configuration of MiniMRCluster + */ + public Configuration getJobConf() { + return contextMap.get(getContextHandle()).getJobConf(); + } + + /** + * @return configuration of Hive Metastore + */ + public HiveConf getHiveConf() { + return contextMap.get(getContextHandle()).getHiveConf(); + } + + /** + * @return filesystem used by ManyMiniCluster daemons + */ + public FileSystem getFileSystem() { + return contextMap.get(getContextHandle()).getFileSystem(); + } + + /** + * class used to encapsulate a context which is normally used by + * a single TestSuite or across TestSuites when multi-threaded testing is turned on + */ + public static class Context { + protected String testDir; + protected ManyMiniCluster cluster; + + protected Configuration hbaseConf; + protected Configuration jobConf; + protected HiveConf hiveConf; + + protected FileSystem fileSystem; + + protected int usageCount = 0; + + public Context(String handle) { + testDir = new File(TEST_DIR + "/test_" + handle + "_" + Math.abs(new Random().nextLong()) + "/").getPath(); + System.out.println("Cluster work directory: " + testDir); } - - /** - * startup an hbase cluster instance before a test suite runs - */ - @BeforeClass - public static void setup() { - if (!contextMap.containsKey(getContextHandle())) - contextMap.put(getContextHandle(), new Context(getContextHandle())); - - contextMap.get(getContextHandle()).start(); + public void start() { + if (usageCount++ == 0) { + ManyMiniCluster.Builder b = ManyMiniCluster.create(new File(testDir)); + if (testConf != null) { + b.hbaseConf(HBaseConfiguration.create(testConf)); + } + cluster = b.build(); + cluster.start(); + this.hbaseConf = cluster.getHBaseConf(); + jobConf = cluster.getJobConf(); + fileSystem = cluster.getFileSystem(); + hiveConf = cluster.getHiveConf(); + } } - /** - * shutdown an hbase cluster instance ant the end of the test suite - */ - @AfterClass - public static void tearDown() { - contextMap.get(getContextHandle()).stop(); - } + public void stop() { + if (--usageCount == 0) { + try { + cluster.stop(); + cluster = null; + } finally { + System.out.println("Trying to cleanup: " + testDir); + try { + FileSystem fs = FileSystem.get(jobConf); + fs.delete(new Path(testDir), true); + } catch (IOException e) { + throw new IllegalStateException("Failed to cleanup test dir", e); + } - /** - * override this with a different context handle if tests suites are run simultaneously - * and ManyMiniCluster instances shouldn't be shared - * @return - */ - public static String getContextHandle() { - return DEFAULT_CONTEXT_HANDLE; + } + } } - /** - * @return working directory for a given test context, which normally is a test suite - */ public String getTestDir() { - return contextMap.get(getContextHandle()).getTestDir(); + return testDir; } - /** - * @return ManyMiniCluster instance - */ public ManyMiniCluster getCluster() { - return contextMap.get(getContextHandle()).getCluster(); + return cluster; } - /** - * @return configuration of MiniHBaseCluster - */ public Configuration getHbaseConf() { - return contextMap.get(getContextHandle()).getHbaseConf(); + return hbaseConf; } - /** - * @return configuration of MiniMRCluster - */ public Configuration getJobConf() { - return contextMap.get(getContextHandle()).getJobConf(); + return jobConf; } - /** - * @return configuration of Hive Metastore - */ public HiveConf getHiveConf() { - return contextMap.get(getContextHandle()).getHiveConf(); + return hiveConf; } - /** - * @return filesystem used by ManyMiniCluster daemons - */ public FileSystem getFileSystem() { - return contextMap.get(getContextHandle()).getFileSystem(); - } - - /** - * class used to encapsulate a context which is normally used by - * a single TestSuite or across TestSuites when multi-threaded testing is turned on - */ - public static class Context { - protected String testDir; - protected ManyMiniCluster cluster; - - protected Configuration hbaseConf; - protected Configuration jobConf; - protected HiveConf hiveConf; - - protected FileSystem fileSystem; - - protected int usageCount = 0; - - public Context(String handle) { - testDir = new File(TEST_DIR + "/test_" + handle + "_" + Math.abs(new Random().nextLong()) + "/").getPath(); - System.out.println("Cluster work directory: " + testDir); - } - - public void start() { - if (usageCount++ == 0) { - ManyMiniCluster.Builder b = ManyMiniCluster.create(new File(testDir)); - if (testConf != null) { - b.hbaseConf(HBaseConfiguration.create(testConf)); - } - cluster = b.build(); - cluster.start(); - this.hbaseConf = cluster.getHBaseConf(); - jobConf = cluster.getJobConf(); - fileSystem = cluster.getFileSystem(); - hiveConf = cluster.getHiveConf(); - } - } - - public void stop() { - if (--usageCount == 0) { - try { - cluster.stop(); - cluster = null; - } finally { - System.out.println("Trying to cleanup: " + testDir); - try { - FileSystem fs = FileSystem.get(jobConf); - fs.delete(new Path(testDir), true); - } catch (IOException e) { - throw new IllegalStateException("Failed to cleanup test dir", e); - } - - } - } - } - - public String getTestDir() { - return testDir; - } - - public ManyMiniCluster getCluster() { - return cluster; - } - - public Configuration getHbaseConf() { - return hbaseConf; - } - - public Configuration getJobConf() { - return jobConf; - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - public FileSystem getFileSystem() { - return fileSystem; - } + return fileSystem; } + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java index 4c7cc04..3cb8c9f 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java @@ -84,548 +84,548 @@ * Including ImprtSequenceFile and HBaseBulkOutputFormat */ public class TestHBaseBulkOutputFormat extends SkeletonHBaseTest { - private final static Logger LOG = LoggerFactory.getLogger(TestHBaseBulkOutputFormat.class); - - private final HiveConf allConf; - private final HCatDriver hcatDriver; - - public TestHBaseBulkOutputFormat() { - allConf = getHiveConf(); - allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); - allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) - allConf.set(el.getKey(), el.getValue()); - for (Map.Entry el : getJobConf()) - allConf.set(el.getKey(), el.getValue()); - - HBaseConfiguration.merge( - allConf, - RevisionManagerConfiguration.create()); - SessionState.start(new CliSessionState(allConf)); - hcatDriver = new HCatDriver(); + private final static Logger LOG = LoggerFactory.getLogger(TestHBaseBulkOutputFormat.class); + + private final HiveConf allConf; + private final HCatDriver hcatDriver; + + public TestHBaseBulkOutputFormat() { + allConf = getHiveConf(); + allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); + allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) + allConf.set(el.getKey(), el.getValue()); + for (Map.Entry el : getJobConf()) + allConf.set(el.getKey(), el.getValue()); + + HBaseConfiguration.merge( + allConf, + RevisionManagerConfiguration.create()); + SessionState.start(new CliSessionState(allConf)); + hcatDriver = new HCatDriver(); + } + + public static class MapWriteOldMapper implements org.apache.hadoop.mapred.Mapper { + + @Override + public void close() throws IOException { } - public static class MapWriteOldMapper implements org.apache.hadoop.mapred.Mapper { - - @Override - public void close() throws IOException { - } - - @Override - public void configure(JobConf job) { - } - - @Override - public void map(LongWritable key, Text value, - OutputCollector output, - Reporter reporter) throws IOException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - put.add(Bytes.toBytes("my_family"), - Bytes.toBytes(pair[0]), - Bytes.toBytes(pair[1])); - } - output.collect(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); - } - + @Override + public void configure(JobConf job) { } - public static class MapWrite extends Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - put.add(Bytes.toBytes("my_family"), - Bytes.toBytes(pair[0]), - Bytes.toBytes(pair[1])); - } - context.write(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); - } + @Override + public void map(LongWritable key, Text value, + OutputCollector output, + Reporter reporter) throws IOException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + put.add(Bytes.toBytes("my_family"), + Bytes.toBytes(pair[0]), + Bytes.toBytes(pair[1])); + } + output.collect(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); } - public static class MapHCatWrite extends Mapper { - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key", schema, Integer.parseInt(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - record.set(pair[0], schema, pair[1]); - } - context.write(null, record); - } + } + + public static class MapWrite extends Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + put.add(Bytes.toBytes("my_family"), + Bytes.toBytes(pair[0]), + Bytes.toBytes(pair[1])); + } + context.write(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); } - - @Test - public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { - String testName = "hbaseBulkOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - LOG.info("starting: " + testName); - - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - - //create table - conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - createTable(tableName, new String[]{familyName}); - - String data[] = {"1,english:one,spanish:uno", - "2,english:two,spanish:dos", - "3,english:three,spanish:tres"}; - - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - Path interPath = new Path(methodTestDir, "inter"); - //create job - JobConf job = new JobConf(conf); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapWriteOldMapper.class); - - job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); - org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); - - job.setOutputFormat(HBaseBulkOutputFormat.class); - org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); - job.setOutputCommitter(HBaseBulkOutputCommitter.class); - - //manually create transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); - Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - HCatUtil.serialize(txn)); - job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - } finally { - rm.close(); - } - - job.setMapOutputKeyClass(ImmutableBytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setOutputKeyClass(ImmutableBytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - - RunningJob runJob = JobClient.runJob(job); - runJob.waitForCompletion(); - assertTrue(runJob.isSuccessful()); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); - //test if scratch directory was erased - assertFalse(FileSystem.get(job).exists(interPath)); + } + + public static class MapHCatWrite extends Mapper { + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key", schema, Integer.parseInt(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + record.set(pair[0], schema, pair[1]); + } + context.write(null, record); + } + } + + @Test + public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { + String testName = "hbaseBulkOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + LOG.info("starting: " + testName); + + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + + //create table + conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + createTable(tableName, new String[]{familyName}); + + String data[] = {"1,english:one,spanish:uno", + "2,english:two,spanish:dos", + "3,english:three,spanish:tres"}; + + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + Path interPath = new Path(methodTestDir, "inter"); + //create job + JobConf job = new JobConf(conf); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapWriteOldMapper.class); + + job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); + org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); + + job.setOutputFormat(HBaseBulkOutputFormat.class); + org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); + job.setOutputCommitter(HBaseBulkOutputCommitter.class); + + //manually create transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); + Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + HCatUtil.serialize(txn)); + job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + } finally { + rm.close(); } - @Test - public void importSequenceFileTest() throws IOException, ClassNotFoundException, InterruptedException { - String testName = "importSequenceFileTest"; - Path methodTestDir = new Path(getTestDir(), testName); - LOG.info("starting: " + testName); - - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - - //create table - createTable(tableName, new String[]{familyName}); - - String data[] = {"1,english:one,spanish:uno", - "2,english:two,spanish:dos", - "3,english:three,spanish:tres"}; - - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - Path interPath = new Path(methodTestDir, "inter"); - Path scratchPath = new Path(methodTestDir, "scratch"); - - - //create job - Job job = new Job(conf, testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapWrite.class); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - - job.setOutputFormatClass(SequenceFileOutputFormat.class); - SequenceFileOutputFormat.setOutputPath(job, interPath); - - job.setMapOutputKeyClass(ImmutableBytesWritable.class); - job.setMapOutputValueClass(Put.class); - - job.setOutputKeyClass(ImmutableBytesWritable.class); - job.setOutputValueClass(Put.class); - - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - - job = new Job(new Configuration(allConf), testName + "_importer"); - assertTrue(ImportSequenceFile.runJob(job, tableName, interPath, scratchPath)); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); - //test if scratch directory was erased - assertFalse(FileSystem.get(job.getConfiguration()).exists(scratchPath)); + job.setMapOutputKeyClass(ImmutableBytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + + job.setOutputKeyClass(ImmutableBytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; + } + //test if load count is the same + assertEquals(data.length, index); + //test if scratch directory was erased + assertFalse(FileSystem.get(job).exists(interPath)); + } + + @Test + public void importSequenceFileTest() throws IOException, ClassNotFoundException, InterruptedException { + String testName = "importSequenceFileTest"; + Path methodTestDir = new Path(getTestDir(), testName); + LOG.info("starting: " + testName); + + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + + //create table + createTable(tableName, new String[]{familyName}); + + String data[] = {"1,english:one,spanish:uno", + "2,english:two,spanish:dos", + "3,english:three,spanish:tres"}; + + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + Path interPath = new Path(methodTestDir, "inter"); + Path scratchPath = new Path(methodTestDir, "scratch"); + + + //create job + Job job = new Job(conf, testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapWrite.class); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + + job.setOutputFormatClass(SequenceFileOutputFormat.class); + SequenceFileOutputFormat.setOutputPath(job, interPath); + + job.setMapOutputKeyClass(ImmutableBytesWritable.class); + job.setMapOutputValueClass(Put.class); + + job.setOutputKeyClass(ImmutableBytesWritable.class); + job.setOutputValueClass(Put.class); + + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + + job = new Job(new Configuration(allConf), testName + "_importer"); + assertTrue(ImportSequenceFile.runJob(job, tableName, interPath, scratchPath)); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; } + //test if load count is the same + assertEquals(data.length, index); + //test if scratch directory was erased + assertFalse(FileSystem.get(job.getConfiguration()).exists(scratchPath)); + } + + @Test + public void bulkModeHCatOutputFormatTest() throws Exception { + String testName = "bulkModeHCatOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + LOG.info("starting: " + testName); + + String databaseName = testName.toLowerCase(); + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + //create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); + } + + //create job + Job job = new Job(conf, testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapHCatWrite.class); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); - @Test - public void bulkModeHCatOutputFormatTest() throws Exception { - String testName = "bulkModeHCatOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - LOG.info("starting: " + testName); - - String databaseName = testName.toLowerCase(); - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - //create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } - - //create job - Job job = new Job(conf, testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapHCatWrite.class); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - - - job.setOutputFormatClass(HCatOutputFormat.class); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - - assertTrue(job.waitForCompletion(true)); - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); - for (String el : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(el)); - } - } finally { - rm.close(); - } - - //verify - HTable table = new HTable(conf, databaseName + "." + tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); + + job.setOutputFormatClass(HCatOutputFormat.class); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + + assertTrue(job.waitForCompletion(true)); + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); + for (String el : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(el)); + } + } finally { + rm.close(); } - @Test - public void bulkModeHCatOutputFormatTestWithDefaultDB() throws Exception { - String testName = "bulkModeHCatOutputFormatTestWithDefaultDB"; - Path methodTestDir = new Path(getTestDir(), testName); - - String databaseName = "default"; - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - - //create job - Job job = new Job(conf, testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapHCatWrite.class); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - - - job.setOutputFormatClass(HCatOutputFormat.class); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - - assertTrue(job.waitForCompletion(true)); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); + //verify + HTable table = new HTable(conf, databaseName + "." + tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); + } + index++; + } + //test if load count is the same + assertEquals(data.length, index); + } + + @Test + public void bulkModeHCatOutputFormatTestWithDefaultDB() throws Exception { + String testName = "bulkModeHCatOutputFormatTestWithDefaultDB"; + Path methodTestDir = new Path(getTestDir(), testName); + + String databaseName = "default"; + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + + //create job + Job job = new Job(conf, testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapHCatWrite.class); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + + + job.setOutputFormatClass(HCatOutputFormat.class); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + + assertTrue(job.waitForCompletion(true)); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; + } + //test if load count is the same + assertEquals(data.length, index); + } + + @Test + public void bulkModeAbortTest() throws Exception { + String testName = "bulkModeAbortTest"; + Path methodTestDir = new Path(getTestDir(), testName); + String databaseName = testName.toLowerCase(); + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + + // include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + // create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create( + new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); } - @Test - public void bulkModeAbortTest() throws Exception { - String testName = "bulkModeAbortTest"; - Path methodTestDir = new Path(getTestDir(), testName); - String databaseName = testName.toLowerCase(); - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - - // include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir - + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName - + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - // create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create( - new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } - - Path workingDir = new Path(methodTestDir, "mr_abort"); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, - tableName, null); - Job job = configureJob(testName, - conf, workingDir, MapWriteAbortTransaction.class, - outputJobInfo, inputPath); - assertFalse(job.waitForCompletion(true)); - - // verify that revision manager has it as aborted transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); - for (String family : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(family)); - List abortedWriteTransactions = rm.getAbortedWriteTransactions( - databaseName + "." + tableName, family); - assertEquals(1, abortedWriteTransactions.size()); - assertEquals(1, abortedWriteTransactions.get(0).getRevision()); - } - } finally { - rm.close(); - } - - //verify that hbase does not have any of the records. - //Since records are only written during commitJob, - //hbase should not have any records. - HTable table = new HTable(conf, databaseName + "." + tableName); - Scan scan = new Scan(); - scan.addFamily(Bytes.toBytes(familyName)); - ResultScanner scanner = table.getScanner(scan); - assertFalse(scanner.iterator().hasNext()); - - // verify that the storage handler input format returns empty results. - Path outputDir = new Path(getTestDir(), - "mapred/testHBaseTableBulkIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - job = new Job(conf, "hbase-bulk-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadAbortedTransaction.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, databaseName, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); + Path workingDir = new Path(methodTestDir, "mr_abort"); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, + tableName, null); + Job job = configureJob(testName, + conf, workingDir, MapWriteAbortTransaction.class, + outputJobInfo, inputPath); + assertFalse(job.waitForCompletion(true)); + + // verify that revision manager has it as aborted transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); + for (String family : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(family)); + List abortedWriteTransactions = rm.getAbortedWriteTransactions( + databaseName + "." + tableName, family); + assertEquals(1, abortedWriteTransactions.size()); + assertEquals(1, abortedWriteTransactions.get(0).getRevision()); + } + } finally { + rm.close(); } - private Job configureJob(String jobName, Configuration conf, - Path workingDir, Class mapperClass, - OutputJobInfo outputJobInfo, Path inputPath) throws IOException { - Job job = new Job(conf, jobName); - job.setWorkingDirectory(workingDir); - job.setJarByClass(this.getClass()); - job.setMapperClass(mapperClass); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - return job; + //verify that hbase does not have any of the records. + //Since records are only written during commitJob, + //hbase should not have any records. + HTable table = new HTable(conf, databaseName + "." + tableName); + Scan scan = new Scan(); + scan.addFamily(Bytes.toBytes(familyName)); + ResultScanner scanner = table.getScanner(scan); + assertFalse(scanner.iterator().hasNext()); + + // verify that the storage handler input format returns empty results. + Path outputDir = new Path(getTestDir(), + "mapred/testHBaseTableBulkIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); } + job = new Job(conf, "hbase-bulk-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadAbortedTransaction.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, databaseName, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + } + + private Job configureJob(String jobName, Configuration conf, + Path workingDir, Class mapperClass, + OutputJobInfo outputJobInfo, Path inputPath) throws IOException { + Job job = new Job(conf, jobName); + job.setWorkingDirectory(workingDir); + job.setJarByClass(this.getClass()); + job.setMapperClass(mapperClass); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + return job; + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java index ccb4715..97f4875 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java @@ -80,422 +80,422 @@ */ public class TestHBaseDirectOutputFormat extends SkeletonHBaseTest { - private final HiveConf allConf; - private final HCatDriver hcatDriver; - - public TestHBaseDirectOutputFormat() { - allConf = getHiveConf(); - allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); - allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) - allConf.set(el.getKey(), el.getValue()); - for (Map.Entry el : getJobConf()) - allConf.set(el.getKey(), el.getValue()); - HBaseConfiguration.merge( - allConf, - RevisionManagerConfiguration.create()); - SessionState.start(new CliSessionState(allConf)); - hcatDriver = new HCatDriver(); + private final HiveConf allConf; + private final HCatDriver hcatDriver; + + public TestHBaseDirectOutputFormat() { + allConf = getHiveConf(); + allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); + allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) + allConf.set(el.getKey(), el.getValue()); + for (Map.Entry el : getJobConf()) + allConf.set(el.getKey(), el.getValue()); + HBaseConfiguration.merge( + allConf, + RevisionManagerConfiguration.create()); + SessionState.start(new CliSessionState(allConf)); + hcatDriver = new HCatDriver(); + } + + @Test + public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { + String testName = "directOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + //create table + createTable(tableName, new String[]{familyName}); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:ONE,spanish:DOS", + "3,english:ONE,spanish:TRES"}; + + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + + //create job + JobConf job = new JobConf(conf); + job.setJobName(testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapWrite.class); + + job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); + org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); + + job.setOutputFormat(HBaseDirectOutputFormat.class); + job.set(TableOutputFormat.OUTPUT_TABLE, tableName); + job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); + + //manually create transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); + Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + HCatUtil.serialize(txn)); + job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + } finally { + rm.close(); } - @Test - public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { - String testName = "directOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - //create table - createTable(tableName, new String[]{familyName}); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:ONE,spanish:DOS", - "3,english:ONE,spanish:TRES"}; - - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - - //create job - JobConf job = new JobConf(conf); - job.setJobName(testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapWrite.class); - - job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); - org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); - - job.setOutputFormat(HBaseDirectOutputFormat.class); - job.set(TableOutputFormat.OUTPUT_TABLE, tableName); - job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); - - //manually create transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); - Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - HCatUtil.serialize(txn)); - job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - } finally { - rm.close(); - } + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; + } + assertEquals(data.length, index); + } + + @Test + public void directHCatOutputFormatTest() throws Exception { + String testName = "directHCatOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + + String databaseName = testName; + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + //Table name will be lower case unless specified by hbase.table.name property + String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES (" + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:ONE,spanish:DOS", + "3,english:ONE,spanish:TRES"}; + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + //create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); + } - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - - RunningJob runJob = JobClient.runJob(job); - runJob.waitForCompletion(); - assertTrue(runJob.isSuccessful()); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - assertEquals(data.length, index); + //create job + Path workingDir = new Path(methodTestDir, "mr_work"); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, + tableName, null); + Job job = configureJob(testName, conf, workingDir, MapHCatWrite.class, + outputJobInfo, inputPath); + assertTrue(job.waitForCompletion(true)); + + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); + for (String el : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(el)); + } + } finally { + rm.close(); } - @Test - public void directHCatOutputFormatTest() throws Exception { - String testName = "directHCatOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - - String databaseName = testName; - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - //Table name will be lower case unless specified by hbase.table.name property - String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES (" + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:ONE,spanish:DOS", - "3,english:ONE,spanish:TRES"}; - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - //create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } + //verify + HTable table = new HTable(conf, hbaseTableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); + } + index++; + } + assertEquals(data.length, index); + } + + @Test + public void directModeAbortTest() throws Exception { + String testName = "directModeAbortTest"; + Path methodTestDir = new Path(getTestDir(), testName); + String databaseName = testName; + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + //Table name as specified by hbase.table.name property + String hbaseTableName = tableName; + + // include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES (" + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + + ":spanish','hbase.table.name'='" + hbaseTableName + "')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + // create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create( + new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); + } - //create job - Path workingDir = new Path(methodTestDir, "mr_work"); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, - tableName, null); - Job job = configureJob(testName, conf, workingDir, MapHCatWrite.class, - outputJobInfo, inputPath); - assertTrue(job.waitForCompletion(true)); - - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); - for (String el : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(el)); - } - } finally { - rm.close(); - } + Path workingDir = new Path(methodTestDir, "mr_abort"); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, + tableName, null); + Job job = configureJob(testName, conf, workingDir, MapWriteAbortTransaction.class, + outputJobInfo, inputPath); + assertFalse(job.waitForCompletion(true)); + + // verify that revision manager has it as aborted transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); + for (String family : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(family)); + List abortedWriteTransactions = rm.getAbortedWriteTransactions( + hbaseTableName, family); + assertEquals(1, abortedWriteTransactions.size()); + assertEquals(1, abortedWriteTransactions.get(0).getRevision()); + } + } finally { + rm.close(); + } - //verify - HTable table = new HTable(conf, hbaseTableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); - } - index++; - } - assertEquals(data.length, index); + // verify that hbase has the records of the successful maps. + HTable table = new HTable(conf, hbaseTableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int count = 0; + for (Result result : scanner) { + String key = Bytes.toString(result.getRow()); + assertNotSame(MapWriteAbortTransaction.failedKey, key); + int index = Integer.parseInt(key) - 1; + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], + Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0) + .getTimestamp()); + } + count++; + } + assertEquals(data.length - 1, count); + + // verify that the inputformat returns empty results. + Path outputDir = new Path(getTestDir(), + "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); } + job = new Job(conf, "hbase-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadAbortedTransaction.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, databaseName, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + } + + private Job configureJob(String jobName, Configuration conf, + Path workingDir, Class mapperClass, + OutputJobInfo outputJobInfo, Path inputPath) throws IOException { + Job job = new Job(conf, jobName); + job.setWorkingDirectory(workingDir); + job.setJarByClass(this.getClass()); + job.setMapperClass(mapperClass); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setOutput(job, outputJobInfo); + String txnString = job.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY); + //Test passing in same OutputJobInfo multiple times and verify 1 transaction is created + String jobString = job.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); + Job job2 = new Job(conf); + HCatOutputFormat.setOutput(job2, outputJobInfo); + assertEquals(txnString, job2.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + return job; + } + + public static class MapHCatWrite extends Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key", schema, Integer.parseInt(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + record.set(pair[0], schema, pair[1]); + } + context.write(null, record); + } + } - @Test - public void directModeAbortTest() throws Exception { - String testName = "directModeAbortTest"; - Path methodTestDir = new Path(getTestDir(), testName); - String databaseName = testName; - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - //Table name as specified by hbase.table.name property - String hbaseTableName = tableName; - - // include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir - + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES (" + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + - ":spanish','hbase.table.name'='" + hbaseTableName + "')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - // create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create( - new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } + public static class MapWrite implements org.apache.hadoop.mapred.Mapper { - Path workingDir = new Path(methodTestDir, "mr_abort"); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, - tableName, null); - Job job = configureJob(testName, conf, workingDir, MapWriteAbortTransaction.class, - outputJobInfo, inputPath); - assertFalse(job.waitForCompletion(true)); - - // verify that revision manager has it as aborted transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); - for (String family : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(family)); - List abortedWriteTransactions = rm.getAbortedWriteTransactions( - hbaseTableName, family); - assertEquals(1, abortedWriteTransactions.size()); - assertEquals(1, abortedWriteTransactions.get(0).getRevision()); - } - } finally { - rm.close(); - } - - // verify that hbase has the records of the successful maps. - HTable table = new HTable(conf, hbaseTableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int count = 0; - for (Result result : scanner) { - String key = Bytes.toString(result.getRow()); - assertNotSame(MapWriteAbortTransaction.failedKey, key); - int index = Integer.parseInt(key) - 1; - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], - Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0) - .getTimestamp()); - } - count++; - } - assertEquals(data.length - 1, count); - - // verify that the inputformat returns empty results. - Path outputDir = new Path(getTestDir(), - "mapred/testHBaseTableIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - job = new Job(conf, "hbase-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadAbortedTransaction.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, databaseName, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); + @Override + public void configure(JobConf job) { } - private Job configureJob(String jobName, Configuration conf, - Path workingDir, Class mapperClass, - OutputJobInfo outputJobInfo, Path inputPath) throws IOException { - Job job = new Job(conf, jobName); - job.setWorkingDirectory(workingDir); - job.setJarByClass(this.getClass()); - job.setMapperClass(mapperClass); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setOutput(job, outputJobInfo); - String txnString = job.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY); - //Test passing in same OutputJobInfo multiple times and verify 1 transaction is created - String jobString = job.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); - Job job2 = new Job(conf); - HCatOutputFormat.setOutput(job2, outputJobInfo); - assertEquals(txnString, job2.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - return job; + @Override + public void close() throws IOException { } - public static class MapHCatWrite extends Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key", schema, Integer.parseInt(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - record.set(pair[0], schema, pair[1]); - } - context.write(null, record); - } + @Override + public void map(LongWritable key, Text value, + OutputCollector output, Reporter reporter) + throws IOException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + put.add(Bytes.toBytes("my_family"), + Bytes.toBytes(pair[0]), + Bytes.toBytes(pair[1])); + } + output.collect(null, put); } - - public static class MapWrite implements org.apache.hadoop.mapred.Mapper { - - @Override - public void configure(JobConf job) { + } + + static class MapWriteAbortTransaction extends Mapper { + public static String failedKey; + private static int count = 0; + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key", schema, Integer.parseInt(vals[0])); + synchronized (MapWriteAbortTransaction.class) { + if (count == 2) { + failedKey = vals[0]; + throw new IOException("Failing map to test abort"); } - - @Override - public void close() throws IOException { + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + record.set(pair[0], schema, pair[1]); } + context.write(null, record); + count++; + } - @Override - public void map(LongWritable key, Text value, - OutputCollector output, Reporter reporter) - throws IOException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - put.add(Bytes.toBytes("my_family"), - Bytes.toBytes(pair[0]), - Bytes.toBytes(pair[1])); - } - output.collect(null, put); - } } - static class MapWriteAbortTransaction extends Mapper { - public static String failedKey; - private static int count = 0; - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key", schema, Integer.parseInt(vals[0])); - synchronized (MapWriteAbortTransaction.class) { - if (count == 2) { - failedKey = vals[0]; - throw new IOException("Failing map to test abort"); - } - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - record.set(pair[0], schema, pair[1]); - } - context.write(null, record); - count++; - } - + } + + static class MapReadAbortedTransaction + extends + Mapper, Text> { + + @Override + public void run(Context context) throws IOException, + InterruptedException { + setup(context); + if (context.nextKeyValue()) { + map(context.getCurrentKey(), context.getCurrentValue(), context); + while (context.nextKeyValue()) { + map(context.getCurrentKey(), context.getCurrentValue(), + context); } - + throw new IOException("There should have been no records"); + } + cleanup(context); } - static class MapReadAbortedTransaction - extends - Mapper, Text> { - - @Override - public void run(Context context) throws IOException, - InterruptedException { - setup(context); - if (context.nextKeyValue()) { - map(context.getCurrentKey(), context.getCurrentValue(), context); - while (context.nextKeyValue()) { - map(context.getCurrentKey(), context.getCurrentValue(), - context); - } - throw new IOException("There should have been no records"); - } - cleanup(context); - } - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - } + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); } + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseHCatStorageHandler.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseHCatStorageHandler.java index d377061..9ed0630 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseHCatStorageHandler.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseHCatStorageHandler.java @@ -45,197 +45,197 @@ public class TestHBaseHCatStorageHandler extends SkeletonHBaseTest { - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - private static Warehouse wh; - - public void Initialize() throws Exception { - - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - HBaseConfiguration.merge( - hcatConf, - RevisionManagerConfiguration.create()); - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + private static Warehouse wh; + + public void Initialize() throws Exception { + + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } } + HBaseConfiguration.merge( + hcatConf, + RevisionManagerConfiguration.create()); + + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); - @Test - public void testTableCreateDrop() throws Exception { - Initialize(); + } - hcatDriver.run("drop table test_table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + @Test + public void testTableCreateDrop() throws Exception { + Initialize(); - assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table test_table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_table(key int, value string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("test_table"); + assertEquals(0, response.getResponseCode()); - assertTrue(doesTableExist); + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("test_table"); - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - //Should be able to successfully query revision manager - rm.getAbortedWriteTransactions("test_table", "cf1"); + assertTrue(doesTableExist); - hcatDriver.run("drop table test_table"); - doesTableExist = hAdmin.tableExists("test_table"); - assertTrue(doesTableExist == false); + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + //Should be able to successfully query revision manager + rm.getAbortedWriteTransactions("test_table", "cf1"); - try { - rm.getAbortedWriteTransactions("test_table", "cf1"); - } catch (Exception e) { - assertTrue(e.getCause() instanceof NoNodeException); - } - rm.close(); + hcatDriver.run("drop table test_table"); + doesTableExist = hAdmin.tableExists("test_table"); + assertTrue(doesTableExist == false); + try { + rm.getAbortedWriteTransactions("test_table", "cf1"); + } catch (Exception e) { + assertTrue(e.getCause() instanceof NoNodeException); } + rm.close(); - @Test - public void testTableCreateDropDifferentCase() throws Exception { - Initialize(); + } - hcatDriver.run("drop table test_Table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_Table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + @Test + public void testTableCreateDropDifferentCase() throws Exception { + Initialize(); - assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table test_Table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_Table(key int, value string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - //HBase table gets created with lower case unless specified as a table property. - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("test_table"); + assertEquals(0, response.getResponseCode()); - assertTrue(doesTableExist); + //HBase table gets created with lower case unless specified as a table property. + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("test_table"); - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - //Should be able to successfully query revision manager - rm.getAbortedWriteTransactions("test_table", "cf1"); + assertTrue(doesTableExist); - hcatDriver.run("drop table test_table"); - doesTableExist = hAdmin.tableExists("test_table"); - assertTrue(doesTableExist == false); + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + //Should be able to successfully query revision manager + rm.getAbortedWriteTransactions("test_table", "cf1"); - try { - rm.getAbortedWriteTransactions("test_table", "cf1"); - } catch (Exception e) { - assertTrue(e.getCause() instanceof NoNodeException); - } - rm.close(); + hcatDriver.run("drop table test_table"); + doesTableExist = hAdmin.tableExists("test_table"); + assertTrue(doesTableExist == false); + try { + rm.getAbortedWriteTransactions("test_table", "cf1"); + } catch (Exception e) { + assertTrue(e.getCause() instanceof NoNodeException); } + rm.close(); - @Test - public void testTableCreateDropCaseSensitive() throws Exception { - Initialize(); + } - hcatDriver.run("drop table test_Table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_Table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val'," + - " 'hbase.table.name'='CaseSensitiveTable')"); + @Test + public void testTableCreateDropCaseSensitive() throws Exception { + Initialize(); - assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table test_Table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_Table(key int, value string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val'," + + " 'hbase.table.name'='CaseSensitiveTable')"); - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); + assertEquals(0, response.getResponseCode()); - assertTrue(doesTableExist); + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - //Should be able to successfully query revision manager - rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); + assertTrue(doesTableExist); - hcatDriver.run("drop table test_table"); - doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); - assertTrue(doesTableExist == false); + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + //Should be able to successfully query revision manager + rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); - try { - rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); - } catch (Exception e) { - assertTrue(e.getCause() instanceof NoNodeException); - } - rm.close(); + hcatDriver.run("drop table test_table"); + doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); + assertTrue(doesTableExist == false); + try { + rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); + } catch (Exception e) { + assertTrue(e.getCause() instanceof NoNodeException); } + rm.close(); - @Test - public void testTableDropNonExistent() throws Exception { - Initialize(); - - hcatDriver.run("drop table mytable"); - CommandProcessorResponse response = hcatDriver - .run("create table mytable(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + } - assertEquals(0, response.getResponseCode()); + @Test + public void testTableDropNonExistent() throws Exception { + Initialize(); - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("mytable"); - assertTrue(doesTableExist); + hcatDriver.run("drop table mytable"); + CommandProcessorResponse response = hcatDriver + .run("create table mytable(key int, value string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - //Now delete the table from hbase - if (hAdmin.isTableEnabled("mytable")) { - hAdmin.disableTable("mytable"); - } - hAdmin.deleteTable("mytable"); - doesTableExist = hAdmin.tableExists("mytable"); - assertTrue(doesTableExist == false); + assertEquals(0, response.getResponseCode()); - CommandProcessorResponse responseTwo = hcatDriver.run("drop table mytable"); - assertTrue(responseTwo.getResponseCode() == 0); + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("mytable"); + assertTrue(doesTableExist); + //Now delete the table from hbase + if (hAdmin.isTableEnabled("mytable")) { + hAdmin.disableTable("mytable"); } + hAdmin.deleteTable("mytable"); + doesTableExist = hAdmin.tableExists("mytable"); + assertTrue(doesTableExist == false); - @Test - public void testTableCreateExternal() throws Exception { + CommandProcessorResponse responseTwo = hcatDriver.run("drop table mytable"); + assertTrue(responseTwo.getResponseCode() == 0); - String tableName = "testTable"; - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + } - HTableDescriptor tableDesc = new HTableDescriptor(tableName); - tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("key"))); - tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familyone"))); - tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familytwo"))); + @Test + public void testTableCreateExternal() throws Exception { - hAdmin.createTable(tableDesc); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); + String tableName = "testTable"; + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - hcatDriver.run("drop table mytabletwo"); - CommandProcessorResponse response = hcatDriver - .run("create external table mytabletwo(key int, valueone string, valuetwo string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,familyone:val,familytwo:val'," + - "'hbase.table.name'='testTable')"); + HTableDescriptor tableDesc = new HTableDescriptor(tableName); + tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("key"))); + tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familyone"))); + tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familytwo"))); - assertEquals(0, response.getResponseCode()); + hAdmin.createTable(tableDesc); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); - } + hcatDriver.run("drop table mytabletwo"); + CommandProcessorResponse response = hcatDriver + .run("create external table mytabletwo(key int, valueone string, valuetwo string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,familyone:val,familytwo:val'," + + "'hbase.table.name'='testTable')"); + + assertEquals(0, response.getResponseCode()); + + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java index 28d1135..fc78e27 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java @@ -77,533 +77,533 @@ public class TestHBaseInputFormat extends SkeletonHBaseTest { - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - private final byte[] FAMILY = Bytes.toBytes("testFamily"); - private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1"); - private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2"); - - public TestHBaseInputFormat() throws Exception { - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - HBaseConfiguration.merge(hcatConf, - RevisionManagerConfiguration.create()); - - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + private final byte[] FAMILY = Bytes.toBytes("testFamily"); + private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1"); + private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2"); + + public TestHBaseInputFormat() throws Exception { + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + HBaseConfiguration.merge(hcatConf, + RevisionManagerConfiguration.create()); + + + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + private List generatePuts(int num, String tableName) throws IOException { + + List columnFamilies = Arrays.asList("testFamily"); + RevisionManager rm = null; + List myPuts; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + myPuts = new ArrayList(); + for (int i = 1; i <= num; i++) { + Put put = new Put(Bytes.toBytes("testRow")); + put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i)); + put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i)); + myPuts.add(put); + Transaction tsx = rm.beginWriteTransaction(tableName, + columnFamilies); + rm.commitWriteTransaction(tsx); + } + } finally { + if (rm != null) + rm.close(); } - private List generatePuts(int num, String tableName) throws IOException { - - List columnFamilies = Arrays.asList("testFamily"); - RevisionManager rm = null; - List myPuts; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - myPuts = new ArrayList(); - for (int i = 1; i <= num; i++) { - Put put = new Put(Bytes.toBytes("testRow")); - put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i)); - put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i)); - myPuts.add(put); - Transaction tsx = rm.beginWriteTransaction(tableName, - columnFamilies); - rm.commitWriteTransaction(tsx); - } - } finally { - if (rm != null) - rm.close(); + return myPuts; + } + + private void populateHBaseTable(String tName, int revisions) throws IOException { + List myPuts = generatePuts(revisions, tName); + HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); + table.put(myPuts); + } + + private long populateHBaseTableQualifier1(String tName, int value, Boolean commit) + throws IOException { + List columnFamilies = Arrays.asList("testFamily"); + RevisionManager rm = null; + List myPuts = new ArrayList(); + long revision; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + Transaction tsx = rm.beginWriteTransaction(tName, columnFamilies); + + Put put = new Put(Bytes.toBytes("testRow")); + revision = tsx.getRevisionNumber(); + put.add(FAMILY, QUALIFIER1, revision, + Bytes.toBytes("textValue-" + value)); + myPuts.add(put); + + // If commit is null it is left as a running transaction + if (commit != null) { + if (commit) { + rm.commitWriteTransaction(tsx); + } else { + rm.abortWriteTransaction(tsx); } - - return myPuts; + } + } finally { + if (rm != null) + rm.close(); } - - private void populateHBaseTable(String tName, int revisions) throws IOException { - List myPuts = generatePuts(revisions, tName); - HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); - table.put(myPuts); + HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); + table.put(myPuts); + return revision; + } + + @Test + public void TestHBaseTableReadMR() throws Exception { + String tableName = newTableName("MyTable"); + String databaseName = newTableName("MyDatabase"); + //Table name will be lower case unless specified by hbase.table.name property + String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); + String db_dir = new Path(getTestDir(), "hbasedb").toString(); + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + + db_dir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseOne = hcatDriver.run(dbquery); + assertEquals(0, responseOne.getResponseCode()); + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(hbaseTableName); + assertTrue(doesTableExist); + + populateHBaseTable(hbaseTableName, 5); + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); } - - private long populateHBaseTableQualifier1(String tName, int value, Boolean commit) - throws IOException { - List columnFamilies = Arrays.asList("testFamily"); - RevisionManager rm = null; - List myPuts = new ArrayList(); - long revision; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - Transaction tsx = rm.beginWriteTransaction(tName, columnFamilies); - - Put put = new Put(Bytes.toBytes("testRow")); - revision = tsx.getRevisionNumber(); - put.add(FAMILY, QUALIFIER1, revision, - Bytes.toBytes("textValue-" + value)); - myPuts.add(put); - - // If commit is null it is left as a running transaction - if (commit != null) { - if (commit) { - rm.commitWriteTransaction(tsx); - } else { - rm.abortWriteTransaction(tsx); - } - } - } finally { - if (rm != null) - rm.close(); - } - HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); - table.put(myPuts); - return revision; + // create job + Job job = new Job(conf, "hbase-mr-read-test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTable.class); + MapReadHTable.resetCounters(); + + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job.getConfiguration(), databaseName, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Note: These asserts only works in case of LocalJobRunner as they run in same jvm. + // If using MiniMRCluster, the tests will have to be modified. + assertFalse(MapReadHTable.error); + assertEquals(MapReadHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + hbaseTableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); + assertFalse(isHbaseTableThere); + + String dropDB = "DROP DATABASE " + databaseName; + CommandProcessorResponse responseFour = hcatDriver.run(dropDB); + assertEquals(0, responseFour.getResponseCode()); + } + + @Test + public void TestHBaseTableProjectionReadMR() throws Exception { + + String tableName = newTableName("MyTable"); + //Table name as specified by hbase.table.name property + String hbaseTableName = "MyDB_" + tableName; + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=" + + "':key,testFamily:testQualifier1,testFamily:testQualifier2'," + + "'hbase.table.name'='" + hbaseTableName + "')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(hbaseTableName); + assertTrue(doesTableExist); + + populateHBaseTable(hbaseTableName, 5); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); } - - @Test - public void TestHBaseTableReadMR() throws Exception { - String tableName = newTableName("MyTable"); - String databaseName = newTableName("MyDatabase"); - //Table name will be lower case unless specified by hbase.table.name property - String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); - String db_dir = new Path(getTestDir(), "hbasedb").toString(); - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" - + db_dir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseOne = hcatDriver.run(dbquery); - assertEquals(0, responseOne.getResponseCode()); - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(hbaseTableName); - assertTrue(doesTableExist); - - populateHBaseTable(hbaseTableName, 5); - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - Job job = new Job(conf, "hbase-mr-read-test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadHTable.class); - MapReadHTable.resetCounters(); - - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job.getConfiguration(), databaseName, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - // Note: These asserts only works in case of LocalJobRunner as they run in same jvm. - // If using MiniMRCluster, the tests will have to be modified. - assertFalse(MapReadHTable.error); - assertEquals(MapReadHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + hbaseTableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); - assertFalse(isHbaseTableThere); - - String dropDB = "DROP DATABASE " + databaseName; - CommandProcessorResponse responseFour = hcatDriver.run(dropDB); - assertEquals(0, responseFour.getResponseCode()); + // create job + Job job = new Job(conf, "hbase-column-projection"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadProjHTable.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setOutputSchema(job, getProjectionSchema()); + HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + assertFalse(MapReadProjHTable.error); + assertEquals(MapReadProjHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseInputFormatProjectionReadMR() throws Exception { + + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 5); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); } - - @Test - public void TestHBaseTableProjectionReadMR() throws Exception { - - String tableName = newTableName("MyTable"); - //Table name as specified by hbase.table.name property - String hbaseTableName = "MyDB_" + tableName; - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " - + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=" - + "':key,testFamily:testQualifier1,testFamily:testQualifier2'," - + "'hbase.table.name'='" + hbaseTableName + "')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(hbaseTableName); - assertTrue(doesTableExist); - - populateHBaseTable(hbaseTableName, 5); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - Job job = new Job(conf, "hbase-column-projection"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadProjHTable.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setOutputSchema(job, getProjectionSchema()); - HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - assertFalse(MapReadProjHTable.error); - assertEquals(MapReadProjHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); - assertFalse(isHbaseTableThere); + // create job + JobConf job = new JobConf(conf); + job.setJobName("hbase-scan-column"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadProjectionHTable.class); + job.setInputFormat(HBaseInputFormat.class); + + //Configure projection schema + job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); + Job newJob = new Job(job); + HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); + job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); + for (PartInfo partinfo : info.getPartitions()) { + for (Entry entry : partinfo.getJobProperties().entrySet()) + job.set(entry.getKey(), entry.getValue()); } - - @Test - public void TestHBaseInputFormatProjectionReadMR() throws Exception { - - String tableName = newTableName("mytable"); - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + - "testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - populateHBaseTable(tableName, 5); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - JobConf job = new JobConf(conf); - job.setJobName("hbase-scan-column"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadProjectionHTable.class); - job.setInputFormat(HBaseInputFormat.class); - - //Configure projection schema - job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); - Job newJob = new Job(job); - HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); - job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); - for (PartInfo partinfo : info.getPartitions()) { - for (Entry entry : partinfo.getJobProperties().entrySet()) - job.set(entry.getKey(), entry.getValue()); - } - assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); - - job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); - org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - - RunningJob runJob = JobClient.runJob(job); - runJob.waitForCompletion(); - assertTrue(runJob.isSuccessful()); - assertFalse(MapReadProjHTable.error); - assertEquals(MapReadProjHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(tableName); - assertFalse(isHbaseTableThere); + assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); + + job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); + org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + assertFalse(MapReadProjHTable.error); + assertEquals(MapReadProjHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseTableIgnoreAbortedTransactions() throws Exception { + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 5); + populateHBaseTableQualifier1(tableName, 6, false); + populateHBaseTableQualifier1(tableName, 7, false); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); } - - @Test - public void TestHBaseTableIgnoreAbortedTransactions() throws Exception { - String tableName = newTableName("mytable"); - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + - "testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - populateHBaseTable(tableName, 5); - populateHBaseTableQualifier1(tableName, 6, false); - populateHBaseTableQualifier1(tableName, 7, false); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - Job job = new Job(conf, "hbase-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadHTable.class); - MapReadHTable.resetCounters(); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - // Verify that the records do not contain aborted transaction - // revisions 6 and 7 for testFamily:testQualifier1 and - // fetches revision 5 for both testQualifier1 and testQualifier2 - assertFalse(MapReadHTable.error); - assertEquals(1, MapReadHTable.count); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(tableName); - assertFalse(isHbaseTableThere); + Job job = new Job(conf, "hbase-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTable.class); + MapReadHTable.resetCounters(); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Verify that the records do not contain aborted transaction + // revisions 6 and 7 for testFamily:testQualifier1 and + // fetches revision 5 for both testQualifier1 and testQualifier2 + assertFalse(MapReadHTable.error); + assertEquals(1, MapReadHTable.count); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseTableIgnoreAbortedAndRunningTransactions() throws Exception { + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 2); + populateHBaseTableQualifier1(tableName, 3, Boolean.TRUE); //Committed transaction + populateHBaseTableQualifier1(tableName, 4, null); //Running transaction + populateHBaseTableQualifier1(tableName, 5, Boolean.FALSE); //Aborted transaction + populateHBaseTableQualifier1(tableName, 6, Boolean.TRUE); //Committed transaction + populateHBaseTableQualifier1(tableName, 7, null); //Running Transaction + populateHBaseTableQualifier1(tableName, 8, Boolean.FALSE); //Aborted Transaction + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); } - - @Test - public void TestHBaseTableIgnoreAbortedAndRunningTransactions() throws Exception { - String tableName = newTableName("mytable"); - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + - "testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - populateHBaseTable(tableName, 2); - populateHBaseTableQualifier1(tableName, 3, Boolean.TRUE); //Committed transaction - populateHBaseTableQualifier1(tableName, 4, null); //Running transaction - populateHBaseTableQualifier1(tableName, 5, Boolean.FALSE); //Aborted transaction - populateHBaseTableQualifier1(tableName, 6, Boolean.TRUE); //Committed transaction - populateHBaseTableQualifier1(tableName, 7, null); //Running Transaction - populateHBaseTableQualifier1(tableName, 8, Boolean.FALSE); //Aborted Transaction - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - Job job = new Job(conf, "hbase-running-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadHTableRunningAbort.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - // Verify that the records do not contain running and aborted transaction - // and it fetches revision 2 for testQualifier1 and testQualifier2 - assertFalse(MapReadHTableRunningAbort.error); - assertEquals(1, MapReadHTableRunningAbort.count); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(tableName); - assertFalse(isHbaseTableThere); + Job job = new Job(conf, "hbase-running-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTableRunningAbort.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Verify that the records do not contain running and aborted transaction + // and it fetches revision 2 for testQualifier1 and testQualifier2 + assertFalse(MapReadHTableRunningAbort.error); + assertEquals(1, MapReadHTableRunningAbort.count); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + + static class MapReadHTable + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 3) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-5") + && (value.get(2).toString()).equalsIgnoreCase("textValue-5"); + + if (correctValues == false) { + error = true; + } + count++; } - - static class MapReadHTable - extends - Mapper, Text> { - - static boolean error = false; - static int count = 0; - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 3) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-5") - && (value.get(2).toString()).equalsIgnoreCase("textValue-5"); - - if (correctValues == false) { - error = true; - } - count++; - } - - public static void resetCounters() { - error = false; - count = 0; - } + public static void resetCounters() { + error = false; + count = 0; } - - static class MapReadProjHTable - extends - Mapper, Text> { - - static boolean error = false; - static int count = 0; - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 2) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-5"); - - if (correctValues == false) { - error = true; - } - count++; - } + } + + static class MapReadProjHTable + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 2) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-5"); + + if (correctValues == false) { + error = true; + } + count++; } + } - static class MapReadProjectionHTable - implements org.apache.hadoop.mapred.Mapper, Text> { + static class MapReadProjectionHTable + implements org.apache.hadoop.mapred.Mapper, Text> { - static boolean error = false; - static int count = 0; + static boolean error = false; + static int count = 0; - @Override - public void configure(JobConf job) { - } - - @Override - public void close() throws IOException { - } + @Override + public void configure(JobConf job) { + } - @Override - public void map(ImmutableBytesWritable key, Result result, - OutputCollector, Text> output, Reporter reporter) - throws IOException { - System.out.println("Result " + result.toString()); - List list = result.list(); - boolean correctValues = (list.size() == 1) - && (Bytes.toString(list.get(0).getRow())).equalsIgnoreCase("testRow") - && (Bytes.toString(list.get(0).getValue())).equalsIgnoreCase("textValue-5") - && (Bytes.toString(list.get(0).getFamily())).equalsIgnoreCase("testFamily") - && (Bytes.toString(list.get(0).getQualifier())).equalsIgnoreCase("testQualifier1"); - - if (correctValues == false) { - error = true; - } - count++; - } + @Override + public void close() throws IOException { } - static class MapReadHTableRunningAbort - extends - Mapper, Text> { - - static boolean error = false; - static int count = 0; - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 3) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-3") - && (value.get(2).toString()).equalsIgnoreCase("textValue-2"); - - if (correctValues == false) { - error = true; - } - count++; - } + @Override + public void map(ImmutableBytesWritable key, Result result, + OutputCollector, Text> output, Reporter reporter) + throws IOException { + System.out.println("Result " + result.toString()); + List list = result.list(); + boolean correctValues = (list.size() == 1) + && (Bytes.toString(list.get(0).getRow())).equalsIgnoreCase("testRow") + && (Bytes.toString(list.get(0).getValue())).equalsIgnoreCase("textValue-5") + && (Bytes.toString(list.get(0).getFamily())).equalsIgnoreCase("testFamily") + && (Bytes.toString(list.get(0).getQualifier())).equalsIgnoreCase("testQualifier1"); + + if (correctValues == false) { + error = true; + } + count++; } + } + + static class MapReadHTableRunningAbort + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 3) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-3") + && (value.get(2).toString()).equalsIgnoreCase("textValue-2"); + + if (correctValues == false) { + error = true; + } + count++; + } + } - private HCatSchema getProjectionSchema() throws HCatException { + private HCatSchema getProjectionSchema() throws HCatException { - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING, - "")); - schema.append(new HCatFieldSchema("testqualifier1", - HCatFieldSchema.Type.STRING, "")); - return schema; - } + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING, + "")); + schema.append(new HCatFieldSchema("testqualifier1", + HCatFieldSchema.Type.STRING, "")); + return schema; + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java index 0fc8c9d..42fb068 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java @@ -44,98 +44,98 @@ import org.junit.Test; public class TestSnapshots extends SkeletonHBaseTest { - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - - public void Initialize() throws Exception { - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + + public void Initialize() throws Exception { + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } } - @Test - public void TestSnapshotConversion() throws Exception { - Initialize(); - String tableName = newTableName("mytableOne"); - String databaseName = newTableName("mydatabase"); - String fullyQualTableName = databaseName + "." + tableName; - String db_dir = new Path(getTestDir(), "hbasedb").toString(); - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" - + db_dir + "'"; - String tableQuery = "CREATE TABLE " + fullyQualTableName - + "(key string, value1 string, value2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf2:q2')"; - - CommandProcessorResponse cmdResponse = hcatDriver.run(dbquery); - assertEquals(0, cmdResponse.getResponseCode()); - cmdResponse = hcatDriver.run(tableQuery); - assertEquals(0, cmdResponse.getResponseCode()); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - Job job = new Job(conf); - Properties properties = new Properties(); - properties.setProperty(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, "dummysnapshot"); - HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); - String modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); - - Map revMap = new HashMap(); - revMap.put("cf1", 3L); - revMap.put("cf2", 5L); - TableSnapshot hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); - HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); - - assertEquals(hcatSnapshot.getRevision("value1"), 3); - assertEquals(hcatSnapshot.getRevision("value2"), 5); - - String dropTable = "DROP TABLE " + fullyQualTableName; - cmdResponse = hcatDriver.run(dropTable); - assertEquals(0, cmdResponse.getResponseCode()); - - tableName = newTableName("mytableTwo"); - fullyQualTableName = databaseName + "." + tableName; - tableQuery = "CREATE TABLE " + fullyQualTableName - + "(key string, value1 string, value2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf1:q2')"; - cmdResponse = hcatDriver.run(tableQuery); - assertEquals(0, cmdResponse.getResponseCode()); - revMap.clear(); - revMap.put("cf1", 3L); - hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); - HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); - modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); - hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); - assertEquals(hcatSnapshot.getRevision("value1"), 3); - assertEquals(hcatSnapshot.getRevision("value2"), 3); - - dropTable = "DROP TABLE " + fullyQualTableName; - cmdResponse = hcatDriver.run(dropTable); - assertEquals(0, cmdResponse.getResponseCode()); - - String dropDatabase = "DROP DATABASE IF EXISTS " + databaseName + "CASCADE"; - cmdResponse = hcatDriver.run(dropDatabase); - assertEquals(0, cmdResponse.getResponseCode()); - } + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + @Test + public void TestSnapshotConversion() throws Exception { + Initialize(); + String tableName = newTableName("mytableOne"); + String databaseName = newTableName("mydatabase"); + String fullyQualTableName = databaseName + "." + tableName; + String db_dir = new Path(getTestDir(), "hbasedb").toString(); + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + + db_dir + "'"; + String tableQuery = "CREATE TABLE " + fullyQualTableName + + "(key string, value1 string, value2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf2:q2')"; + + CommandProcessorResponse cmdResponse = hcatDriver.run(dbquery); + assertEquals(0, cmdResponse.getResponseCode()); + cmdResponse = hcatDriver.run(tableQuery); + assertEquals(0, cmdResponse.getResponseCode()); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + Job job = new Job(conf); + Properties properties = new Properties(); + properties.setProperty(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, "dummysnapshot"); + HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); + String modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); + + Map revMap = new HashMap(); + revMap.put("cf1", 3L); + revMap.put("cf2", 5L); + TableSnapshot hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); + + assertEquals(hcatSnapshot.getRevision("value1"), 3); + assertEquals(hcatSnapshot.getRevision("value2"), 5); + + String dropTable = "DROP TABLE " + fullyQualTableName; + cmdResponse = hcatDriver.run(dropTable); + assertEquals(0, cmdResponse.getResponseCode()); + + tableName = newTableName("mytableTwo"); + fullyQualTableName = databaseName + "." + tableName; + tableQuery = "CREATE TABLE " + fullyQualTableName + + "(key string, value1 string, value2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf1:q2')"; + cmdResponse = hcatDriver.run(tableQuery); + assertEquals(0, cmdResponse.getResponseCode()); + revMap.clear(); + revMap.put("cf1", 3L); + hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); + HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); + modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); + hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); + assertEquals(hcatSnapshot.getRevision("value1"), 3); + assertEquals(hcatSnapshot.getRevision("value2"), 3); + + dropTable = "DROP TABLE " + fullyQualTableName; + cmdResponse = hcatDriver.run(dropTable); + assertEquals(0, cmdResponse.getResponseCode()); + + String dropDatabase = "DROP DATABASE IF EXISTS " + databaseName + "CASCADE"; + cmdResponse = hcatDriver.run(dropDatabase); + assertEquals(0, cmdResponse.getResponseCode()); + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/IDGenClient.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/IDGenClient.java index 353402b..13a1b89 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/IDGenClient.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/IDGenClient.java @@ -24,49 +24,49 @@ public class IDGenClient extends Thread { - String connectionStr; - String base_dir; - ZKUtil zkutil; - Random sleepTime = new Random(); - int runtime; - HashMap idMap; - String tableName; + String connectionStr; + String base_dir; + ZKUtil zkutil; + Random sleepTime = new Random(); + int runtime; + HashMap idMap; + String tableName; - IDGenClient(String connectionStr, String base_dir, int time, String tableName) { - super(); - this.connectionStr = connectionStr; - this.base_dir = base_dir; - this.zkutil = new ZKUtil(connectionStr, base_dir); - this.runtime = time; - idMap = new HashMap(); - this.tableName = tableName; - } - - /* - * @see java.lang.Runnable#run() - */ - @Override - public void run() { - long startTime = System.currentTimeMillis(); - int timeElapsed = 0; - while( timeElapsed <= runtime){ - try { - long id = zkutil.nextId(tableName); - idMap.put(System.currentTimeMillis(), id); + IDGenClient(String connectionStr, String base_dir, int time, String tableName) { + super(); + this.connectionStr = connectionStr; + this.base_dir = base_dir; + this.zkutil = new ZKUtil(connectionStr, base_dir); + this.runtime = time; + idMap = new HashMap(); + this.tableName = tableName; + } - int sTime = sleepTime.nextInt(2); - Thread.sleep(sTime * 100); - } catch (Exception e) { - e.printStackTrace(); - } + /* + * @see java.lang.Runnable#run() + */ + @Override + public void run() { + long startTime = System.currentTimeMillis(); + int timeElapsed = 0; + while( timeElapsed <= runtime){ + try { + long id = zkutil.nextId(tableName); + idMap.put(System.currentTimeMillis(), id); - timeElapsed = (int) Math.ceil((System.currentTimeMillis() - startTime)/(double)1000); - } + int sTime = sleepTime.nextInt(2); + Thread.sleep(sTime * 100); + } catch (Exception e) { + e.printStackTrace(); + } + timeElapsed = (int) Math.ceil((System.currentTimeMillis() - startTime)/(double)1000); } - Map getIdMap(){ - return idMap; - } + } + + Map getIdMap(){ + return idMap; + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestIDGenerator.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestIDGenerator.java index 9b0cd01..ca046d5 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestIDGenerator.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestIDGenerator.java @@ -30,70 +30,70 @@ public class TestIDGenerator extends SkeletonHBaseTest { - @Test - public void testIDGeneration() throws Exception { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - - String tableName = "myTable"; - long initId = zkutil.nextId(tableName); - for (int i = 0; i < 10; i++) { - long id = zkutil.nextId(tableName); - Assert.assertEquals(initId + (i + 1), id); - } + @Test + public void testIDGeneration() throws Exception { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); } + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - @Test - public void testMultipleClients() throws InterruptedException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - ArrayList clients = new ArrayList(); - - for (int i = 0; i < 5; i++) { - IDGenClient idClient = new IDGenClient(sb.toString(), "/rm_base", 10, "testTable"); - clients.add(idClient); - } - - for (IDGenClient idClient : clients) { - idClient.run(); - } - - for (IDGenClient idClient : clients) { - idClient.join(); - } - - HashMap idMap = new HashMap(); - for (IDGenClient idClient : clients) { - idMap.putAll(idClient.getIdMap()); - } - - ArrayList keys = new ArrayList(idMap.keySet()); - Collections.sort(keys); - int startId = 1; - for (Long key : keys) { - Long id = idMap.get(key); - System.out.println("Key: " + key + " Value " + id); - assertTrue(id == startId); - startId++; - - } + String tableName = "myTable"; + long initId = zkutil.nextId(tableName); + for (int i = 0; i < 10; i++) { + long id = zkutil.nextId(tableName); + Assert.assertEquals(initId + (i + 1), id); } + } + + @Test + public void testMultipleClients() throws InterruptedException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + ArrayList clients = new ArrayList(); + + for (int i = 0; i < 5; i++) { + IDGenClient idClient = new IDGenClient(sb.toString(), "/rm_base", 10, "testTable"); + clients.add(idClient); + } + + for (IDGenClient idClient : clients) { + idClient.run(); + } + + for (IDGenClient idClient : clients) { + idClient.join(); + } + + HashMap idMap = new HashMap(); + for (IDGenClient idClient : clients) { + idMap.putAll(idClient.getIdMap()); + } + + ArrayList keys = new ArrayList(idMap.keySet()); + Collections.sort(keys); + int startId = 1; + for (Long key : keys) { + Long id = idMap.get(key); + System.out.println("Key: " + key + " Value " + id); + assertTrue(id == startId); + startId++; + + } + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManager.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManager.java index 114895a..cafdcd1 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManager.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManager.java @@ -36,225 +36,225 @@ public class TestRevisionManager extends SkeletonHBaseTest { - @Test - public void testBasicZNodeCreation() throws IOException, KeeperException, InterruptedException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf001", "cf002", "cf003"); - - zkutil.createRootZNodes(); - ZooKeeper zk = zkutil.getSession(); - Stat tempTwo = zk.exists("/rm_base" + PathUtil.DATA_DIR, false); - assertTrue(tempTwo != null); - Stat tempThree = zk.exists("/rm_base" + PathUtil.CLOCK_NODE, false); - assertTrue(tempThree != null); - - zkutil.setUpZnodesForTable(tableName, columnFamilies); - String transactionDataTablePath = "/rm_base" + PathUtil.DATA_DIR + "/" + tableName; - Stat result = zk.exists(transactionDataTablePath, false); - assertTrue(result != null); - - for (String colFamiliy : columnFamilies) { - String cfPath = transactionDataTablePath + "/" + colFamiliy; - Stat resultTwo = zk.exists(cfPath, false); - assertTrue(resultTwo != null); - } + @Test + public void testBasicZNodeCreation() throws IOException, KeeperException, InterruptedException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf001", "cf002", "cf003"); + + zkutil.createRootZNodes(); + ZooKeeper zk = zkutil.getSession(); + Stat tempTwo = zk.exists("/rm_base" + PathUtil.DATA_DIR, false); + assertTrue(tempTwo != null); + Stat tempThree = zk.exists("/rm_base" + PathUtil.CLOCK_NODE, false); + assertTrue(tempThree != null); + + zkutil.setUpZnodesForTable(tableName, columnFamilies); + String transactionDataTablePath = "/rm_base" + PathUtil.DATA_DIR + "/" + tableName; + Stat result = zk.exists(transactionDataTablePath, false); + assertTrue(result != null); + + for (String colFamiliy : columnFamilies) { + String cfPath = transactionDataTablePath + "/" + colFamiliy; + Stat resultTwo = zk.exists(cfPath, false); + assertTrue(resultTwo != null); + } + + } + @Test + public void testCommitTransaction() throws IOException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); } - @Test - public void testCommitTransaction() throws IOException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); - Transaction txn = manager.beginWriteTransaction(tableName, - columnFamilies); - - List cfs = zkutil.getColumnFamiliesOfTable(tableName); - assertTrue(cfs.size() == columnFamilies.size()); - for (String cf : cfs) { - assertTrue(columnFamilies.contains(cf)); - } - - for (String colFamily : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamily); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 1); - StoreFamilyRevision lightTxn = list.getRevisionList().get(0); - assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); - assertEquals(lightTxn.revision, txn.getRevisionNumber()); - - } - manager.commitWriteTransaction(txn); - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 0); - - } - - manager.close(); + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); + + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); + Transaction txn = manager.beginWriteTransaction(tableName, + columnFamilies); + + List cfs = zkutil.getColumnFamiliesOfTable(tableName); + assertTrue(cfs.size() == columnFamilies.size()); + for (String cf : cfs) { + assertTrue(columnFamilies.contains(cf)); } - @Test - public void testAbortTransaction() throws IOException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String host = getHbaseConf().get("hbase.zookeeper.quorum"); - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - ZKUtil zkutil = new ZKUtil(host + ':' + port, "/rm_base"); - - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); - Transaction txn = manager.beginWriteTransaction(tableName, columnFamilies); - List cfs = zkutil.getColumnFamiliesOfTable(tableName); - - assertTrue(cfs.size() == columnFamilies.size()); - for (String cf : cfs) { - assertTrue(columnFamilies.contains(cf)); - } - - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 1); - StoreFamilyRevision lightTxn = list.getRevisionList().get(0); - assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); - assertEquals(lightTxn.revision, txn.getRevisionNumber()); - - } - manager.abortWriteTransaction(txn); - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 0); - - } - - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getAbortInformationPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 1); - StoreFamilyRevision abortedTxn = list.getRevisionList().get(0); - assertEquals(abortedTxn.getRevision(), txn.getRevisionNumber()); - } - manager.close(); + for (String colFamily : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamily); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 1); + StoreFamilyRevision lightTxn = list.getRevisionList().get(0); + assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); + assertEquals(lightTxn.revision, txn.getRevisionNumber()); + } + manager.commitWriteTransaction(txn); + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 0); - @Test - public void testKeepAliveTransaction() throws InterruptedException, IOException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf1", "cf2"); - Transaction txn = manager.beginWriteTransaction(tableName, - columnFamilies, 40); - Thread.sleep(100); - try { - manager.commitWriteTransaction(txn); - } catch (Exception e) { - assertTrue(e instanceof IOException); - assertEquals(e.getMessage(), - "The transaction to be removed not found in the data."); - } + } + manager.close(); + } + + @Test + public void testAbortTransaction() throws IOException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String host = getHbaseConf().get("hbase.zookeeper.quorum"); + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + ZKUtil zkutil = new ZKUtil(host + ':' + port, "/rm_base"); + + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); + Transaction txn = manager.beginWriteTransaction(tableName, columnFamilies); + List cfs = zkutil.getColumnFamiliesOfTable(tableName); + + assertTrue(cfs.size() == columnFamilies.size()); + for (String cf : cfs) { + assertTrue(columnFamilies.contains(cf)); } - @Test - public void testCreateSnapshot() throws IOException { - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String host = getHbaseConf().get("hbase.zookeeper.quorum"); - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - String tableName = newTableName("testTable"); - List cfOne = Arrays.asList("cf1", "cf2"); - List cfTwo = Arrays.asList("cf2", "cf3"); - Transaction tsx1 = manager.beginWriteTransaction(tableName, cfOne); - Transaction tsx2 = manager.beginWriteTransaction(tableName, cfTwo); - TableSnapshot snapshotOne = manager.createSnapshot(tableName); - assertEquals(snapshotOne.getRevision("cf1"), 0); - assertEquals(snapshotOne.getRevision("cf2"), 0); - assertEquals(snapshotOne.getRevision("cf3"), 1); - - List cfThree = Arrays.asList("cf1", "cf3"); - Transaction tsx3 = manager.beginWriteTransaction(tableName, cfThree); - manager.commitWriteTransaction(tsx1); - TableSnapshot snapshotTwo = manager.createSnapshot(tableName); - assertEquals(snapshotTwo.getRevision("cf1"), 2); - assertEquals(snapshotTwo.getRevision("cf2"), 1); - assertEquals(snapshotTwo.getRevision("cf3"), 1); - - manager.commitWriteTransaction(tsx2); - TableSnapshot snapshotThree = manager.createSnapshot(tableName); - assertEquals(snapshotThree.getRevision("cf1"), 2); - assertEquals(snapshotThree.getRevision("cf2"), 3); - assertEquals(snapshotThree.getRevision("cf3"), 2); - manager.commitWriteTransaction(tsx3); - TableSnapshot snapshotFour = manager.createSnapshot(tableName); - assertEquals(snapshotFour.getRevision("cf1"), 3); - assertEquals(snapshotFour.getRevision("cf2"), 3); - assertEquals(snapshotFour.getRevision("cf3"), 3); + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 1); + StoreFamilyRevision lightTxn = list.getRevisionList().get(0); + assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); + assertEquals(lightTxn.revision, txn.getRevisionNumber()); } + manager.abortWriteTransaction(txn); + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 0); + + } + + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getAbortInformationPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 1); + StoreFamilyRevision abortedTxn = list.getRevisionList().get(0); + assertEquals(abortedTxn.getRevision(), txn.getRevisionNumber()); + } + manager.close(); + } + + @Test + public void testKeepAliveTransaction() throws InterruptedException, IOException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf1", "cf2"); + Transaction txn = manager.beginWriteTransaction(tableName, + columnFamilies, 40); + Thread.sleep(100); + try { + manager.commitWriteTransaction(txn); + } catch (Exception e) { + assertTrue(e instanceof IOException); + assertEquals(e.getMessage(), + "The transaction to be removed not found in the data."); + } + + } + + @Test + public void testCreateSnapshot() throws IOException { + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String host = getHbaseConf().get("hbase.zookeeper.quorum"); + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + String tableName = newTableName("testTable"); + List cfOne = Arrays.asList("cf1", "cf2"); + List cfTwo = Arrays.asList("cf2", "cf3"); + Transaction tsx1 = manager.beginWriteTransaction(tableName, cfOne); + Transaction tsx2 = manager.beginWriteTransaction(tableName, cfTwo); + TableSnapshot snapshotOne = manager.createSnapshot(tableName); + assertEquals(snapshotOne.getRevision("cf1"), 0); + assertEquals(snapshotOne.getRevision("cf2"), 0); + assertEquals(snapshotOne.getRevision("cf3"), 1); + + List cfThree = Arrays.asList("cf1", "cf3"); + Transaction tsx3 = manager.beginWriteTransaction(tableName, cfThree); + manager.commitWriteTransaction(tsx1); + TableSnapshot snapshotTwo = manager.createSnapshot(tableName); + assertEquals(snapshotTwo.getRevision("cf1"), 2); + assertEquals(snapshotTwo.getRevision("cf2"), 1); + assertEquals(snapshotTwo.getRevision("cf3"), 1); + + manager.commitWriteTransaction(tsx2); + TableSnapshot snapshotThree = manager.createSnapshot(tableName); + assertEquals(snapshotThree.getRevision("cf1"), 2); + assertEquals(snapshotThree.getRevision("cf2"), 3); + assertEquals(snapshotThree.getRevision("cf3"), 2); + manager.commitWriteTransaction(tsx3); + TableSnapshot snapshotFour = manager.createSnapshot(tableName); + assertEquals(snapshotFour.getRevision("cf1"), 3); + assertEquals(snapshotFour.getRevision("cf2"), 3); + assertEquals(snapshotFour.getRevision("cf3"), 3); + + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java index 301bf92..bb00459 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java @@ -25,10 +25,10 @@ public class TestRevisionManagerConfiguration { - @Test - public void testDefault() { - Configuration conf = RevisionManagerConfiguration.create(); - Assert.assertEquals("org.apache.hcatalog.hbase.snapshot.ZKBasedRevisionManager", - conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS)); - } + @Test + public void testDefault() { + Configuration conf = RevisionManagerConfiguration.create(); + Assert.assertEquals("org.apache.hcatalog.hbase.snapshot.ZKBasedRevisionManager", + conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS)); + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java index fe9ca40..3208fa6 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java @@ -35,172 +35,172 @@ public class TestRevisionManagerEndpoint extends SkeletonHBaseTest { - static { - // test case specific mini cluster settings - testConf = new Configuration(false); - testConf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, - "org.apache.hcatalog.hbase.snapshot.RevisionManagerEndpoint", - "org.apache.hadoop.hbase.coprocessor.GenericEndpoint"); - testConf.set(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, MockRM.class.getName()); - } - - /** - * Mock implementation to test the protocol/serialization - */ - public static class MockRM implements RevisionManager { - - private static class Invocation { - Invocation(String methodName, Object ret, Object... args) { - this.methodName = methodName; - this.args = args; - this.ret = ret; - } - - String methodName; - Object[] args; - Object ret; - - private static boolean equals(Object obj1, Object obj2) { - if (obj1 == obj2) return true; - if (obj1 == null || obj2 == null) return false; - if (obj1 instanceof Transaction || obj1 instanceof TableSnapshot) { - return obj1.toString().equals(obj2.toString()); - } - return obj1.equals(obj2); - } - - @Override - public boolean equals(Object obj) { - Invocation other = (Invocation) obj; - if (this == other) return true; - if (other == null) return false; - if (this.args != other.args) { - if (this.args == null || other.args == null) return false; - if (this.args.length != other.args.length) return false; - for (int i = 0; i < args.length; i++) { - if (!equals(this.args[i], other.args[i])) return false; - } - } - return equals(this.ret, other.ret); - } - - @Override - public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE). - append("method", this.methodName). - append("args", this.args). - append("returns", this.ret). - toString(); - } + static { + // test case specific mini cluster settings + testConf = new Configuration(false); + testConf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, + "org.apache.hcatalog.hbase.snapshot.RevisionManagerEndpoint", + "org.apache.hadoop.hbase.coprocessor.GenericEndpoint"); + testConf.set(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, MockRM.class.getName()); + } + + /** + * Mock implementation to test the protocol/serialization + */ + public static class MockRM implements RevisionManager { + + private static class Invocation { + Invocation(String methodName, Object ret, Object... args) { + this.methodName = methodName; + this.args = args; + this.ret = ret; + } + + String methodName; + Object[] args; + Object ret; + + private static boolean equals(Object obj1, Object obj2) { + if (obj1 == obj2) return true; + if (obj1 == null || obj2 == null) return false; + if (obj1 instanceof Transaction || obj1 instanceof TableSnapshot) { + return obj1.toString().equals(obj2.toString()); } + return obj1.equals(obj2); + } + + @Override + public boolean equals(Object obj) { + Invocation other = (Invocation) obj; + if (this == other) return true; + if (other == null) return false; + if (this.args != other.args) { + if (this.args == null || other.args == null) return false; + if (this.args.length != other.args.length) return false; + for (int i = 0; i < args.length; i++) { + if (!equals(this.args[i], other.args[i])) return false; + } + } + return equals(this.ret, other.ret); + } + + @Override + public String toString() { + return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE). + append("method", this.methodName). + append("args", this.args). + append("returns", this.ret). + toString(); + } + } - final static String DEFAULT_INSTANCE = "default"; - final static Map INSTANCES = new ConcurrentHashMap(); - Invocation lastCall; - boolean isOpen = false; + final static String DEFAULT_INSTANCE = "default"; + final static Map INSTANCES = new ConcurrentHashMap(); + Invocation lastCall; + boolean isOpen = false; - private T recordCall(T result, Object... args) { - StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); - lastCall = new Invocation(stackTrace[2].getMethodName(), result, args); - return result; - } + private T recordCall(T result, Object... args) { + StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); + lastCall = new Invocation(stackTrace[2].getMethodName(), result, args); + return result; + } - @Override - public void initialize(Configuration conf) { - if (!INSTANCES.containsKey(DEFAULT_INSTANCE)) - INSTANCES.put(DEFAULT_INSTANCE, this); - } + @Override + public void initialize(Configuration conf) { + if (!INSTANCES.containsKey(DEFAULT_INSTANCE)) + INSTANCES.put(DEFAULT_INSTANCE, this); + } - @Override - public void open() throws IOException { - isOpen = true; - } + @Override + public void open() throws IOException { + isOpen = true; + } - @Override - public void close() throws IOException { - isOpen = false; - } + @Override + public void close() throws IOException { + isOpen = false; + } - @Override - public void createTable(String table, List columnFamilies) throws IOException { - } + @Override + public void createTable(String table, List columnFamilies) throws IOException { + } - @Override - public void dropTable(String table) throws IOException { - } + @Override + public void dropTable(String table) throws IOException { + } - @Override - public Transaction beginWriteTransaction(String table, - List families) throws IOException { - return recordCall(null, table, families); - } + @Override + public Transaction beginWriteTransaction(String table, + List families) throws IOException { + return recordCall(null, table, families); + } - @Override - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException { - return recordCall(null, table, families, keepAlive); - } + @Override + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException { + return recordCall(null, table, families, keepAlive); + } - @Override - public void commitWriteTransaction(Transaction transaction) - throws IOException { - } + @Override + public void commitWriteTransaction(Transaction transaction) + throws IOException { + } - @Override - public void abortWriteTransaction(Transaction transaction) - throws IOException { - } + @Override + public void abortWriteTransaction(Transaction transaction) + throws IOException { + } - @Override - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException { - return null; - } + @Override + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException { + return null; + } - @Override - public TableSnapshot createSnapshot(String tableName) - throws IOException { - return null; - } + @Override + public TableSnapshot createSnapshot(String tableName) + throws IOException { + return null; + } - @Override - public TableSnapshot createSnapshot(String tableName, long revision) - throws IOException { - TableSnapshot ret = new TableSnapshot(tableName, new HashMap(), revision); - return recordCall(ret, tableName, revision); - } + @Override + public TableSnapshot createSnapshot(String tableName, long revision) + throws IOException { + TableSnapshot ret = new TableSnapshot(tableName, new HashMap(), revision); + return recordCall(ret, tableName, revision); + } - @Override - public void keepAlive(Transaction transaction) throws IOException { - recordCall(null, transaction); - } + @Override + public void keepAlive(Transaction transaction) throws IOException { + recordCall(null, transaction); } + } - @Test - public void testRevisionManagerProtocol() throws Throwable { + @Test + public void testRevisionManagerProtocol() throws Throwable { - Configuration conf = getHbaseConf(); - RevisionManager rm = RevisionManagerFactory.getOpenedRevisionManager( - RevisionManagerEndpointClient.class.getName(), conf); + Configuration conf = getHbaseConf(); + RevisionManager rm = RevisionManagerFactory.getOpenedRevisionManager( + RevisionManagerEndpointClient.class.getName(), conf); - MockRM mockImpl = MockRM.INSTANCES.get(MockRM.DEFAULT_INSTANCE); - Assert.assertNotNull(mockImpl); - Assert.assertTrue(mockImpl.isOpen); + MockRM mockImpl = MockRM.INSTANCES.get(MockRM.DEFAULT_INSTANCE); + Assert.assertNotNull(mockImpl); + Assert.assertTrue(mockImpl.isOpen); - Transaction t = new Transaction("t1", Arrays.asList("f1", "f2"), 0, 0); - MockRM.Invocation call = new MockRM.Invocation("keepAlive", null, t); - rm.keepAlive(t); - Assert.assertEquals(call.methodName, call, mockImpl.lastCall); + Transaction t = new Transaction("t1", Arrays.asList("f1", "f2"), 0, 0); + MockRM.Invocation call = new MockRM.Invocation("keepAlive", null, t); + rm.keepAlive(t); + Assert.assertEquals(call.methodName, call, mockImpl.lastCall); - t = new Transaction("t2", Arrays.asList("f21", "f22"), 0, 0); - call = new MockRM.Invocation("beginWriteTransaction", null, t.getTableName(), t.getColumnFamilies()); - call.ret = rm.beginWriteTransaction(t.getTableName(), t.getColumnFamilies()); - Assert.assertEquals(call.methodName, call, mockImpl.lastCall); + t = new Transaction("t2", Arrays.asList("f21", "f22"), 0, 0); + call = new MockRM.Invocation("beginWriteTransaction", null, t.getTableName(), t.getColumnFamilies()); + call.ret = rm.beginWriteTransaction(t.getTableName(), t.getColumnFamilies()); + Assert.assertEquals(call.methodName, call, mockImpl.lastCall); - call = new MockRM.Invocation("createSnapshot", null, "t3", 1L); - call.ret = rm.createSnapshot("t3", 1); - Assert.assertEquals(call.methodName, call, mockImpl.lastCall); + call = new MockRM.Invocation("createSnapshot", null, "t3", 1L); + call.ret = rm.createSnapshot("t3", 1); + Assert.assertEquals(call.methodName, call, mockImpl.lastCall); - } + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestThriftSerialization.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestThriftSerialization.java index e423f65..5466125 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestThriftSerialization.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestThriftSerialization.java @@ -31,55 +31,55 @@ public class TestThriftSerialization { - @Test - public void testLightWeightTransaction() { - StoreFamilyRevision trxn = new StoreFamilyRevision(0, 1000); - try { + @Test + public void testLightWeightTransaction() { + StoreFamilyRevision trxn = new StoreFamilyRevision(0, 1000); + try { - byte[] data = ZKUtil.serialize(trxn); - StoreFamilyRevision newWtx = new StoreFamilyRevision(); - ZKUtil.deserialize(newWtx, data); + byte[] data = ZKUtil.serialize(trxn); + StoreFamilyRevision newWtx = new StoreFamilyRevision(); + ZKUtil.deserialize(newWtx, data); - assertTrue(newWtx.getRevision() == trxn.getRevision()); - assertTrue(newWtx.getTimestamp() == trxn.getTimestamp()); + assertTrue(newWtx.getRevision() == trxn.getRevision()); + assertTrue(newWtx.getTimestamp() == trxn.getTimestamp()); - } catch (IOException e) { - e.printStackTrace(); - } + } catch (IOException e) { + e.printStackTrace(); } + } - @Test - public void testWriteTransactionList() { - List txnList = new ArrayList(); - long version; - long timestamp; - for (int i = 0; i < 10; i++) { - version = i; - timestamp = 1000 + i; - StoreFamilyRevision wtx = new StoreFamilyRevision(version, timestamp); - txnList.add(wtx); - } + @Test + public void testWriteTransactionList() { + List txnList = new ArrayList(); + long version; + long timestamp; + for (int i = 0; i < 10; i++) { + version = i; + timestamp = 1000 + i; + StoreFamilyRevision wtx = new StoreFamilyRevision(version, timestamp); + txnList.add(wtx); + } - StoreFamilyRevisionList wList = new StoreFamilyRevisionList(txnList); + StoreFamilyRevisionList wList = new StoreFamilyRevisionList(txnList); - try { - byte[] data = ZKUtil.serialize(wList); - StoreFamilyRevisionList newList = new StoreFamilyRevisionList(); - ZKUtil.deserialize(newList, data); - assertTrue(newList.getRevisionListSize() == wList.getRevisionListSize()); + try { + byte[] data = ZKUtil.serialize(wList); + StoreFamilyRevisionList newList = new StoreFamilyRevisionList(); + ZKUtil.deserialize(newList, data); + assertTrue(newList.getRevisionListSize() == wList.getRevisionListSize()); - Iterator itr = newList.getRevisionListIterator(); - int i = 0; - while (itr.hasNext()) { - StoreFamilyRevision txn = itr.next(); - assertTrue(txn.getRevision() == i); - assertTrue(txn.getTimestamp() == (i + 1000)); - i++; - } + Iterator itr = newList.getRevisionListIterator(); + int i = 0; + while (itr.hasNext()) { + StoreFamilyRevision txn = itr.next(); + assertTrue(txn.getRevision() == i); + assertTrue(txn.getTimestamp() == (i + 1000)); + i++; + } - } catch (IOException e) { - e.printStackTrace(); - } + } catch (IOException e) { + e.printStackTrace(); } + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestZNodeSetUp.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestZNodeSetUp.java index 7a1a3c2..eaf434d 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestZNodeSetUp.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestZNodeSetUp.java @@ -43,78 +43,78 @@ public class TestZNodeSetUp extends SkeletonHBaseTest { - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - - public void Initialize() throws Exception { - - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - HBaseConfiguration.merge(hcatConf, - RevisionManagerConfiguration.create()); - hcatConf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + + public void Initialize() throws Exception { + + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + HBaseConfiguration.merge(hcatConf, + RevisionManagerConfiguration.create()); + hcatConf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + @Test + public void testBasicZNodeCreation() throws Exception { + + Initialize(); + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); } - @Test - public void testBasicZNodeCreation() throws Exception { - - Initialize(); - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - hcatDriver.run("drop table test_table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + hcatDriver.run("drop table test_table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_table(key int, value string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - assertEquals(0, response.getResponseCode()); + assertEquals(0, response.getResponseCode()); - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("test_table"); - assertTrue(doesTableExist); + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("test_table"); + assertTrue(doesTableExist); - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - ZooKeeper zk = zkutil.getSession(); - String tablePath = PathUtil.getTxnDataPath("/rm_base", "test_table"); - Stat tempTwo = zk.exists(tablePath, false); - assertTrue(tempTwo != null); + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); + ZooKeeper zk = zkutil.getSession(); + String tablePath = PathUtil.getTxnDataPath("/rm_base", "test_table"); + Stat tempTwo = zk.exists(tablePath, false); + assertTrue(tempTwo != null); - String cfPath = PathUtil.getTxnDataPath("/rm_base", "test_table") + "/cf1"; - Stat tempThree = zk.exists(cfPath, false); - assertTrue(tempThree != null); + String cfPath = PathUtil.getTxnDataPath("/rm_base", "test_table") + "/cf1"; + Stat tempThree = zk.exists(cfPath, false); + assertTrue(tempThree != null); - hcatDriver.run("drop table test_table"); + hcatDriver.run("drop table test_table"); - System.out.println("Table path : " + tablePath); - Stat tempFour = zk.exists(tablePath, false); - assertTrue(tempFour == null); + System.out.println("Table path : " + tablePath); + Stat tempFour = zk.exists(tablePath, false); + assertTrue(tempFour == null); - } + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestWriteLock.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestWriteLock.java index c03a00b..76c59e6 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestWriteLock.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestWriteLock.java @@ -35,127 +35,127 @@ * Recipe with a change in the package name. */ public class TestWriteLock extends ClientBase { - protected int sessionTimeout = 10 * 1000; - protected String dir = "/" + getClass().getName(); - protected WriteLock[] nodes; - protected CountDownLatch latch = new CountDownLatch(1); - private boolean restartServer = true; - private boolean workAroundClosingLastZNodeFails = true; - private boolean killLeader = true; - - @Test - public void testRun() throws Exception { - runTest(3); + protected int sessionTimeout = 10 * 1000; + protected String dir = "/" + getClass().getName(); + protected WriteLock[] nodes; + protected CountDownLatch latch = new CountDownLatch(1); + private boolean restartServer = true; + private boolean workAroundClosingLastZNodeFails = true; + private boolean killLeader = true; + + @Test + public void testRun() throws Exception { + runTest(3); + } + + class LockCallback implements LockListener { + public void lockAcquired() { + latch.countDown(); } - class LockCallback implements LockListener { - public void lockAcquired() { - latch.countDown(); - } + public void lockReleased() { - public void lockReleased() { + } - } + } + protected void runTest(int count) throws Exception { + nodes = new WriteLock[count]; + for (int i = 0; i < count; i++) { + ZooKeeper keeper = createClient(); + WriteLock leader = new WriteLock(keeper, dir, null); + leader.setLockListener(new LockCallback()); + nodes[i] = leader; + + leader.lock(); } - protected void runTest(int count) throws Exception { - nodes = new WriteLock[count]; - for (int i = 0; i < count; i++) { - ZooKeeper keeper = createClient(); - WriteLock leader = new WriteLock(keeper, dir, null); - leader.setLockListener(new LockCallback()); - nodes[i] = leader; + // lets wait for any previous leaders to die and one of our new + // nodes to become the new leader + latch.await(30, TimeUnit.SECONDS); - leader.lock(); - } + WriteLock first = nodes[0]; + dumpNodes(count); - // lets wait for any previous leaders to die and one of our new - // nodes to become the new leader - latch.await(30, TimeUnit.SECONDS); + // lets assert that the first election is the leader + Assert.assertTrue("The first znode should be the leader " + first.getId(), first.isOwner()); - WriteLock first = nodes[0]; - dumpNodes(count); + for (int i = 1; i < count; i++) { + WriteLock node = nodes[i]; + Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); + } + if (count > 1) { + if (killLeader) { + System.out.println("Now killing the leader"); + // now lets kill the leader + latch = new CountDownLatch(1); + first.unlock(); + latch.await(30, TimeUnit.SECONDS); + //Thread.sleep(10000); + WriteLock second = nodes[1]; + dumpNodes(count); // lets assert that the first election is the leader - Assert.assertTrue("The first znode should be the leader " + first.getId(), first.isOwner()); + Assert.assertTrue("The second znode should be the leader " + second.getId(), second.isOwner()); - for (int i = 1; i < count; i++) { - WriteLock node = nodes[i]; - Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); + for (int i = 2; i < count; i++) { + WriteLock node = nodes[i]; + Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); } + } - if (count > 1) { - if (killLeader) { - System.out.println("Now killing the leader"); - // now lets kill the leader - latch = new CountDownLatch(1); - first.unlock(); - latch.await(30, TimeUnit.SECONDS); - //Thread.sleep(10000); - WriteLock second = nodes[1]; - dumpNodes(count); - // lets assert that the first election is the leader - Assert.assertTrue("The second znode should be the leader " + second.getId(), second.isOwner()); - - for (int i = 2; i < count; i++) { - WriteLock node = nodes[i]; - Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); - } - } - - - if (restartServer) { - // now lets stop the server - System.out.println("Now stopping the server"); - stopServer(); - Thread.sleep(10000); - - // TODO lets assert that we are no longer the leader - dumpNodes(count); - - System.out.println("Starting the server"); - startServer(); - Thread.sleep(10000); - - for (int i = 0; i < count - 1; i++) { - System.out.println("Calling acquire for node: " + i); - nodes[i].lock(); - } - dumpNodes(count); - System.out.println("Now closing down..."); - } - } - } - protected void dumpNodes(int count) { - for (int i = 0; i < count; i++) { - WriteLock node = nodes[i]; - System.out.println("node: " + i + " id: " + - node.getId() + " is leader: " + node.isOwner()); + if (restartServer) { + // now lets stop the server + System.out.println("Now stopping the server"); + stopServer(); + Thread.sleep(10000); + + // TODO lets assert that we are no longer the leader + dumpNodes(count); + + System.out.println("Starting the server"); + startServer(); + Thread.sleep(10000); + + for (int i = 0; i < count - 1; i++) { + System.out.println("Calling acquire for node: " + i); + nodes[i].lock(); } + dumpNodes(count); + System.out.println("Now closing down..."); + } } + } - @After - public void tearDown() throws Exception { - if (nodes != null) { - for (int i = 0; i < nodes.length; i++) { - WriteLock node = nodes[i]; - if (node != null) { - System.out.println("Closing node: " + i); - node.close(); - if (workAroundClosingLastZNodeFails && i == nodes.length - 1) { - System.out.println("Not closing zookeeper: " + i + " due to bug!"); - } else { - System.out.println("Closing zookeeper: " + i); - node.getZookeeper().close(); - System.out.println("Closed zookeeper: " + i); - } - } - } + protected void dumpNodes(int count) { + for (int i = 0; i < count; i++) { + WriteLock node = nodes[i]; + System.out.println("node: " + i + " id: " + + node.getId() + " is leader: " + node.isOwner()); + } + } + + @After + public void tearDown() throws Exception { + if (nodes != null) { + for (int i = 0; i < nodes.length; i++) { + WriteLock node = nodes[i]; + if (node != null) { + System.out.println("Closing node: " + i); + node.close(); + if (workAroundClosingLastZNodeFails && i == nodes.length - 1) { + System.out.println("Not closing zookeeper: " + i + " due to bug!"); + } else { + System.out.println("Closing zookeeper: " + i); + node.getZookeeper().close(); + System.out.println("Closed zookeeper: " + i); + } } - System.out.println("Now lets stop the server"); - super.tearDown(); - + } } + System.out.println("Now lets stop the server"); + super.tearDown(); + + } } diff --git a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestZNodeName.java b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestZNodeName.java index a39c691..6e980d5 100644 --- a/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestZNodeName.java +++ b/hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestZNodeName.java @@ -31,32 +31,32 @@ * The package name has been changed. */ public class TestZNodeName extends TestCase { - @Test - public void testOrderWithSamePrefix() throws Exception { - String[] names = { "x-3", "x-5", "x-11", "x-1" }; - String[] expected = { "x-1", "x-3", "x-5", "x-11" }; - assertOrderedNodeNames(names, expected); - } - @Test - public void testOrderWithDifferentPrefixes() throws Exception { - String[] names = { "r-3", "r-2", "r-1", "w-2", "w-1" }; - String[] expected = { "r-1", "r-2", "r-3", "w-1", "w-2" }; - assertOrderedNodeNames(names, expected); + @Test + public void testOrderWithSamePrefix() throws Exception { + String[] names = { "x-3", "x-5", "x-11", "x-1" }; + String[] expected = { "x-1", "x-3", "x-5", "x-11" }; + assertOrderedNodeNames(names, expected); + } + @Test + public void testOrderWithDifferentPrefixes() throws Exception { + String[] names = { "r-3", "r-2", "r-1", "w-2", "w-1" }; + String[] expected = { "r-1", "r-2", "r-3", "w-1", "w-2" }; + assertOrderedNodeNames(names, expected); + } + + protected void assertOrderedNodeNames(String[] names, String[] expected) { + int size = names.length; + assertEquals("The two arrays should be the same size!", names.length, expected.length); + SortedSet nodeNames = new TreeSet(); + for (String name : names) { + nodeNames.add(new ZNodeName(name)); } - protected void assertOrderedNodeNames(String[] names, String[] expected) { - int size = names.length; - assertEquals("The two arrays should be the same size!", names.length, expected.length); - SortedSet nodeNames = new TreeSet(); - for (String name : names) { - nodeNames.add(new ZNodeName(name)); - } - - int index = 0; - for (ZNodeName nodeName : nodeNames) { - String name = nodeName.getName(); - assertEquals("Node " + index, expected[index++], name); - } + int index = 0; + for (ZNodeName nodeName : nodeNames) { + String name = nodeName.getName(); + assertEquals("Node " + index, expected[index++], name); } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ConnectionFailureException.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ConnectionFailureException.java index c5a8a0a..eef9ab2 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ConnectionFailureException.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ConnectionFailureException.java @@ -27,14 +27,14 @@ */ public class ConnectionFailureException extends HCatException { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - /** - * @param message Exception message. - * @param cause The wrapped Throwable that caused this exception. - */ - public ConnectionFailureException(String message, Throwable cause) { - super(message, cause); - } + /** + * @param message Exception message. + * @param cause The wrapped Throwable that caused this exception. + */ + public ConnectionFailureException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatAddPartitionDesc.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatAddPartitionDesc.java index 562b2fc..de8d131 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatAddPartitionDesc.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatAddPartitionDesc.java @@ -40,147 +40,147 @@ */ public class HCatAddPartitionDesc { - private static final Logger LOG = LoggerFactory.getLogger(HCatAddPartitionDesc.class); - private String tableName; - private String dbName; - private String location; - private Map partSpec; - - private HCatAddPartitionDesc(String dbName, String tbl, String loc, Map spec) { - this.dbName = dbName; - this.tableName = tbl; - this.location = loc; - this.partSpec = spec; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.location; + private static final Logger LOG = LoggerFactory.getLogger(HCatAddPartitionDesc.class); + private String tableName; + private String dbName; + private String location; + private Map partSpec; + + private HCatAddPartitionDesc(String dbName, String tbl, String loc, Map spec) { + this.dbName = dbName; + this.tableName = tbl; + this.location = loc; + this.partSpec = spec; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.location; + } + + + /** + * Gets the partition spec. + * + * @return the partition spec + */ + public Map getPartitionSpec() { + return this.partSpec; + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + @Override + public String toString() { + return "HCatAddPartitionDesc [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (location != null ? "location=" + location + ", " : "location=null") + + (partSpec != null ? "partSpec=" + partSpec : "partSpec=null") + "]"; + } + + /** + * Creates the builder for specifying attributes. + * + * @param dbName the db name + * @param tableName the table name + * @param location the location + * @param partSpec the part spec + * @return the builder + * @throws HCatException + */ + public static Builder create(String dbName, String tableName, String location, + Map partSpec) throws HCatException { + return new Builder(dbName, tableName, location, partSpec); + } + + Partition toHivePartition(Table hiveTable) throws HCatException { + Partition hivePtn = new Partition(); + hivePtn.setDbName(this.dbName); + hivePtn.setTableName(this.tableName); + + List pvals = new ArrayList(); + for (FieldSchema field : hiveTable.getPartitionKeys()) { + String val = partSpec.get(field.getName()); + if (val == null || val.length() == 0) { + throw new HCatException("create partition: Value for key " + + field.getName() + " is null or empty"); + } + pvals.add(val); } - - /** - * Gets the partition spec. - * - * @return the partition spec - */ - public Map getPartitionSpec() { - return this.partSpec; + hivePtn.setValues(pvals); + StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); + hivePtn.setSd(sd); + hivePtn.setParameters(hiveTable.getParameters()); + if (this.location != null) { + hivePtn.getSd().setLocation(this.location); + } else { + String partName; + try { + partName = Warehouse.makePartName( + hiveTable.getPartitionKeys(), pvals); + LOG.info("Setting partition location to :" + partName); + } catch (MetaException e) { + throw new HCatException("Exception while creating partition name.", e); + } + Path partPath = new Path(hiveTable.getSd().getLocation(), partName); + hivePtn.getSd().setLocation(partPath.toString()); } + hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); + hivePtn.setLastAccessTimeIsSet(false); + return hivePtn; + } - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } + public static class Builder { - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } + private String tableName; + private String location; + private Map values; + private String dbName; - @Override - public String toString() { - return "HCatAddPartitionDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (partSpec != null ? "partSpec=" + partSpec : "partSpec=null") + "]"; + private Builder(String dbName, String tableName, String location, Map values) { + this.dbName = dbName; + this.tableName = tableName; + this.location = location; + this.values = values; } /** - * Creates the builder for specifying attributes. + * Builds the HCatAddPartitionDesc. * - * @param dbName the db name - * @param tableName the table name - * @param location the location - * @param partSpec the part spec - * @return the builder + * @return the h cat add partition desc * @throws HCatException */ - public static Builder create(String dbName, String tableName, String location, - Map partSpec) throws HCatException { - return new Builder(dbName, tableName, location, partSpec); - } - - Partition toHivePartition(Table hiveTable) throws HCatException { - Partition hivePtn = new Partition(); - hivePtn.setDbName(this.dbName); - hivePtn.setTableName(this.tableName); - - List pvals = new ArrayList(); - for (FieldSchema field : hiveTable.getPartitionKeys()) { - String val = partSpec.get(field.getName()); - if (val == null || val.length() == 0) { - throw new HCatException("create partition: Value for key " - + field.getName() + " is null or empty"); - } - pvals.add(val); - } - - hivePtn.setValues(pvals); - StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); - hivePtn.setSd(sd); - hivePtn.setParameters(hiveTable.getParameters()); - if (this.location != null) { - hivePtn.getSd().setLocation(this.location); - } else { - String partName; - try { - partName = Warehouse.makePartName( - hiveTable.getPartitionKeys(), pvals); - LOG.info("Setting partition location to :" + partName); - } catch (MetaException e) { - throw new HCatException("Exception while creating partition name.", e); - } - Path partPath = new Path(hiveTable.getSd().getLocation(), partName); - hivePtn.getSd().setLocation(partPath.toString()); - } - hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); - hivePtn.setLastAccessTimeIsSet(false); - return hivePtn; - } - - public static class Builder { - - private String tableName; - private String location; - private Map values; - private String dbName; - - private Builder(String dbName, String tableName, String location, Map values) { - this.dbName = dbName; - this.tableName = tableName; - this.location = location; - this.values = values; - } - - /** - * Builds the HCatAddPartitionDesc. - * - * @return the h cat add partition desc - * @throws HCatException - */ - public HCatAddPartitionDesc build() throws HCatException { - if (this.dbName == null) { - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatAddPartitionDesc desc = new HCatAddPartitionDesc( - this.dbName, this.tableName, this.location, - this.values); - return desc; - } + public HCatAddPartitionDesc build() throws HCatException { + if (this.dbName == null) { + this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + } + HCatAddPartitionDesc desc = new HCatAddPartitionDesc( + this.dbName, this.tableName, this.location, + this.values); + return desc; } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClient.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClient.java index 33f4434..5917d97 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClient.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClient.java @@ -33,330 +33,330 @@ */ public abstract class HCatClient { - public enum DropDBMode {RESTRICT, CASCADE} - - public static final String HCAT_CLIENT_IMPL_CLASS = "hcat.client.impl.class"; - - /** - * Creates an instance of HCatClient. - * - * @param conf An instance of configuration. - * @return An instance of HCatClient. - * @throws HCatException - */ - public static HCatClient create(Configuration conf) throws HCatException { - HCatClient client = null; - String className = conf.get(HCAT_CLIENT_IMPL_CLASS, - HCatClientHMSImpl.class.getName()); - try { - Class clientClass = Class.forName(className, - true, JavaUtils.getClassLoader()).asSubclass( - HCatClient.class); - client = (HCatClient) clientClass.newInstance(); - } catch (ClassNotFoundException e) { - throw new HCatException( - "ClassNotFoundException while creating client class.", e); - } catch (InstantiationException e) { - throw new HCatException( - "InstantiationException while creating client class.", e); - } catch (IllegalAccessException e) { - throw new HCatException( - "IllegalAccessException while creating client class.", e); - } - if (client != null) { - client.initialize(conf); - } - return client; + public enum DropDBMode {RESTRICT, CASCADE} + + public static final String HCAT_CLIENT_IMPL_CLASS = "hcat.client.impl.class"; + + /** + * Creates an instance of HCatClient. + * + * @param conf An instance of configuration. + * @return An instance of HCatClient. + * @throws HCatException + */ + public static HCatClient create(Configuration conf) throws HCatException { + HCatClient client = null; + String className = conf.get(HCAT_CLIENT_IMPL_CLASS, + HCatClientHMSImpl.class.getName()); + try { + Class clientClass = Class.forName(className, + true, JavaUtils.getClassLoader()).asSubclass( + HCatClient.class); + client = (HCatClient) clientClass.newInstance(); + } catch (ClassNotFoundException e) { + throw new HCatException( + "ClassNotFoundException while creating client class.", e); + } catch (InstantiationException e) { + throw new HCatException( + "InstantiationException while creating client class.", e); + } catch (IllegalAccessException e) { + throw new HCatException( + "IllegalAccessException while creating client class.", e); } - - abstract void initialize(Configuration conf) throws HCatException; - - /** - * Get all existing databases that match the given - * pattern. The matching occurs as per Java regular expressions - * - * @param pattern java re pattern - * @return list of database names - * @throws HCatException - */ - public abstract List listDatabaseNamesByPattern(String pattern) - throws HCatException; - - /** - * Gets the database. - * - * @param dbName The name of the database. - * @return An instance of HCatDatabaseInfo. - * @throws HCatException - */ - public abstract HCatDatabase getDatabase(String dbName) throws HCatException; - - /** - * Creates the database. - * - * @param dbInfo An instance of HCatCreateDBDesc. - * @throws HCatException - */ - public abstract void createDatabase(HCatCreateDBDesc dbInfo) - throws HCatException; - - /** - * Drops a database. - * - * @param dbName The name of the database to delete. - * @param ifExists Hive returns an error if the database specified does not exist, - * unless ifExists is set to true. - * @param mode This is set to either "restrict" or "cascade". Restrict will - * remove the schema if all the tables are empty. Cascade removes - * everything including data and definitions. - * @throws HCatException - */ - public abstract void dropDatabase(String dbName, boolean ifExists, - DropDBMode mode) throws HCatException; - - /** - * Returns all existing tables from the specified database which match the given - * pattern. The matching occurs as per Java regular expressions. - * @param dbName The name of the DB (to be searched) - * @param tablePattern The regex for the table-name - * @return list of table names - * @throws HCatException - */ - public abstract List listTableNamesByPattern(String dbName, String tablePattern) - throws HCatException; - - /** - * Gets the table. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @return An instance of HCatTableInfo. - * @throws HCatException - */ - public abstract HCatTable getTable(String dbName, String tableName) - throws HCatException; - - /** - * Creates the table. - * - * @param createTableDesc An instance of HCatCreateTableDesc class. - * @throws HCatException - */ - public abstract void createTable(HCatCreateTableDesc createTableDesc) throws HCatException; - - /** - * Updates the Table's column schema to the specified definition. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @param columnSchema The (new) definition of the column schema (i.e. list of fields). - * - */ - public abstract void updateTableSchema(String dbName, String tableName, List columnSchema) - throws HCatException; - - /** - * Creates the table like an existing table. - * - * @param dbName The name of the database. - * @param existingTblName The name of the existing table. - * @param newTableName The name of the new table. - * @param ifNotExists If true, then error related to already table existing is skipped. - * @param isExternal Set to "true", if table has be created at a different - * location other than default. - * @param location The location for the table. - * @throws HCatException - */ - public abstract void createTableLike(String dbName, String existingTblName, - String newTableName, boolean ifNotExists, boolean isExternal, - String location) throws HCatException; - - /** - * Drop table. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @param ifExists Hive returns an error if the database specified does not exist, - * unless ifExists is set to true. - * @throws HCatException - */ - public abstract void dropTable(String dbName, String tableName, - boolean ifExists) throws HCatException; - - /** - * Renames a table. - * - * @param dbName The name of the database. - * @param oldName The name of the table to be renamed. - * @param newName The new name of the table. - * @throws HCatException - */ - public abstract void renameTable(String dbName, String oldName, - String newName) throws HCatException; - - /** - * Gets all the partitions. - * - * @param dbName The name of the database. - * @param tblName The name of the table. - * @return A list of partitions. - * @throws HCatException - */ - public abstract List getPartitions(String dbName, String tblName) - throws HCatException; - - /** - * Gets all the partitions that match the specified (and possibly partial) partition specification. - * A partial partition-specification is one where not all partition-keys have associated values. For example, - * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), - * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, - * 1. Complete partition spec: getPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would return 1 partition. - * 2. Partial partition spec: getPartitions('myDb', 'myTable', {dt='20120101'}) would return all 3 partitions, - * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). - * @param dbName The name of the database. - * @param tblName The name of the table. - * @param partitionSpec The partition specification. (Need not include all partition keys.) - * @return A list of partitions. - * @throws HCatException - */ - public abstract List getPartitions(String dbName, String tblName, Map partitionSpec) - throws HCatException; - - /** - * Gets the partition. - * - * @param dbName The database name. - * @param tableName The table name. - * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. All partition-key-values - * must be specified. - * @return An instance of HCatPartitionInfo. - * @throws HCatException - */ - public abstract HCatPartition getPartition(String dbName, String tableName, - Map partitionSpec) throws HCatException; - - /** - * Adds the partition. - * - * @param partInfo An instance of HCatAddPartitionDesc. - * @throws HCatException - */ - public abstract void addPartition(HCatAddPartitionDesc partInfo) - throws HCatException; - - /** - * Adds a list of partitions. - * - * @param partInfoList A list of HCatAddPartitionDesc. - * @return The number of partitions added. - * @throws HCatException - */ - public abstract int addPartitions(List partInfoList) - throws HCatException; - - /** - * Drops partition(s) that match the specified (and possibly partial) partition specification. - * A partial partition-specification is one where not all partition-keys have associated values. For example, - * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), - * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, - * 1. Complete partition spec: dropPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would drop 1 partition. - * 2. Partial partition spec: dropPartitions('myDb', 'myTable', {dt='20120101'}) would drop all 3 partitions, - * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). - * @param dbName The database name. - * @param tableName The table name. - * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. - * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true. - * @throws HCatException,ConnectionFailureException - */ - public abstract void dropPartitions(String dbName, String tableName, - Map partitionSpec, boolean ifExists) - throws HCatException; - - /** - * List partitions by filter. - * - * @param dbName The database name. - * @param tblName The table name. - * @param filter The filter string, - * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can - * be done only on string partition keys. - * @return list of partitions - * @throws HCatException - */ - public abstract List listPartitionsByFilter(String dbName, String tblName, - String filter) throws HCatException; - - /** - * Mark partition for event. - * - * @param dbName The database name. - * @param tblName The table name. - * @param partKVs the key-values associated with the partition. - * @param eventType the event type - * @throws HCatException - */ - public abstract void markPartitionForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException; - - /** - * Checks if a partition is marked for event. - * - * @param dbName the db name - * @param tblName the table name - * @param partKVs the key-values associated with the partition. - * @param eventType the event type - * @return true, if is partition marked for event - * @throws HCatException - */ - public abstract boolean isPartitionMarkedForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException; - - /** - * Gets the delegation token. - * - * @param owner the owner - * @param renewerKerberosPrincipalName the renewer kerberos principal name - * @return the delegation token - * @throws HCatException,ConnectionFailureException - */ - public abstract String getDelegationToken(String owner, - String renewerKerberosPrincipalName) throws HCatException; - - /** - * Renew delegation token. - * - * @param tokenStrForm the token string - * @return the new expiration time - * @throws HCatException - */ - public abstract long renewDelegationToken(String tokenStrForm) - throws HCatException; - - /** - * Cancel delegation token. - * - * @param tokenStrForm the token string - * @throws HCatException - */ - public abstract void cancelDelegationToken(String tokenStrForm) - throws HCatException; - - /** - * Retrieve Message-bus topic for a table. - * - * @param dbName The name of the DB. - * @param tableName The name of the table. - * @return Topic-name for the message-bus on which messages will be sent for the specified table. - * By default, this is set to .. Returns null when not set. - */ - public abstract String getMessageBusTopicName(String dbName, String tableName) throws HCatException; - - /** - * Close the hcatalog client. - * - * @throws HCatException - */ - public abstract void close() throws HCatException; + if (client != null) { + client.initialize(conf); + } + return client; + } + + abstract void initialize(Configuration conf) throws HCatException; + + /** + * Get all existing databases that match the given + * pattern. The matching occurs as per Java regular expressions + * + * @param pattern java re pattern + * @return list of database names + * @throws HCatException + */ + public abstract List listDatabaseNamesByPattern(String pattern) + throws HCatException; + + /** + * Gets the database. + * + * @param dbName The name of the database. + * @return An instance of HCatDatabaseInfo. + * @throws HCatException + */ + public abstract HCatDatabase getDatabase(String dbName) throws HCatException; + + /** + * Creates the database. + * + * @param dbInfo An instance of HCatCreateDBDesc. + * @throws HCatException + */ + public abstract void createDatabase(HCatCreateDBDesc dbInfo) + throws HCatException; + + /** + * Drops a database. + * + * @param dbName The name of the database to delete. + * @param ifExists Hive returns an error if the database specified does not exist, + * unless ifExists is set to true. + * @param mode This is set to either "restrict" or "cascade". Restrict will + * remove the schema if all the tables are empty. Cascade removes + * everything including data and definitions. + * @throws HCatException + */ + public abstract void dropDatabase(String dbName, boolean ifExists, + DropDBMode mode) throws HCatException; + + /** + * Returns all existing tables from the specified database which match the given + * pattern. The matching occurs as per Java regular expressions. + * @param dbName The name of the DB (to be searched) + * @param tablePattern The regex for the table-name + * @return list of table names + * @throws HCatException + */ + public abstract List listTableNamesByPattern(String dbName, String tablePattern) + throws HCatException; + + /** + * Gets the table. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @return An instance of HCatTableInfo. + * @throws HCatException + */ + public abstract HCatTable getTable(String dbName, String tableName) + throws HCatException; + + /** + * Creates the table. + * + * @param createTableDesc An instance of HCatCreateTableDesc class. + * @throws HCatException + */ + public abstract void createTable(HCatCreateTableDesc createTableDesc) throws HCatException; + + /** + * Updates the Table's column schema to the specified definition. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @param columnSchema The (new) definition of the column schema (i.e. list of fields). + * + */ + public abstract void updateTableSchema(String dbName, String tableName, List columnSchema) + throws HCatException; + + /** + * Creates the table like an existing table. + * + * @param dbName The name of the database. + * @param existingTblName The name of the existing table. + * @param newTableName The name of the new table. + * @param ifNotExists If true, then error related to already table existing is skipped. + * @param isExternal Set to "true", if table has be created at a different + * location other than default. + * @param location The location for the table. + * @throws HCatException + */ + public abstract void createTableLike(String dbName, String existingTblName, + String newTableName, boolean ifNotExists, boolean isExternal, + String location) throws HCatException; + + /** + * Drop table. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @param ifExists Hive returns an error if the database specified does not exist, + * unless ifExists is set to true. + * @throws HCatException + */ + public abstract void dropTable(String dbName, String tableName, + boolean ifExists) throws HCatException; + + /** + * Renames a table. + * + * @param dbName The name of the database. + * @param oldName The name of the table to be renamed. + * @param newName The new name of the table. + * @throws HCatException + */ + public abstract void renameTable(String dbName, String oldName, + String newName) throws HCatException; + + /** + * Gets all the partitions. + * + * @param dbName The name of the database. + * @param tblName The name of the table. + * @return A list of partitions. + * @throws HCatException + */ + public abstract List getPartitions(String dbName, String tblName) + throws HCatException; + + /** + * Gets all the partitions that match the specified (and possibly partial) partition specification. + * A partial partition-specification is one where not all partition-keys have associated values. For example, + * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), + * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, + * 1. Complete partition spec: getPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would return 1 partition. + * 2. Partial partition spec: getPartitions('myDb', 'myTable', {dt='20120101'}) would return all 3 partitions, + * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). + * @param dbName The name of the database. + * @param tblName The name of the table. + * @param partitionSpec The partition specification. (Need not include all partition keys.) + * @return A list of partitions. + * @throws HCatException + */ + public abstract List getPartitions(String dbName, String tblName, Map partitionSpec) + throws HCatException; + + /** + * Gets the partition. + * + * @param dbName The database name. + * @param tableName The table name. + * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. All partition-key-values + * must be specified. + * @return An instance of HCatPartitionInfo. + * @throws HCatException + */ + public abstract HCatPartition getPartition(String dbName, String tableName, + Map partitionSpec) throws HCatException; + + /** + * Adds the partition. + * + * @param partInfo An instance of HCatAddPartitionDesc. + * @throws HCatException + */ + public abstract void addPartition(HCatAddPartitionDesc partInfo) + throws HCatException; + + /** + * Adds a list of partitions. + * + * @param partInfoList A list of HCatAddPartitionDesc. + * @return The number of partitions added. + * @throws HCatException + */ + public abstract int addPartitions(List partInfoList) + throws HCatException; + + /** + * Drops partition(s) that match the specified (and possibly partial) partition specification. + * A partial partition-specification is one where not all partition-keys have associated values. For example, + * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), + * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, + * 1. Complete partition spec: dropPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would drop 1 partition. + * 2. Partial partition spec: dropPartitions('myDb', 'myTable', {dt='20120101'}) would drop all 3 partitions, + * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). + * @param dbName The database name. + * @param tableName The table name. + * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. + * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true. + * @throws HCatException,ConnectionFailureException + */ + public abstract void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists) + throws HCatException; + + /** + * List partitions by filter. + * + * @param dbName The database name. + * @param tblName The table name. + * @param filter The filter string, + * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can + * be done only on string partition keys. + * @return list of partitions + * @throws HCatException + */ + public abstract List listPartitionsByFilter(String dbName, String tblName, + String filter) throws HCatException; + + /** + * Mark partition for event. + * + * @param dbName The database name. + * @param tblName The table name. + * @param partKVs the key-values associated with the partition. + * @param eventType the event type + * @throws HCatException + */ + public abstract void markPartitionForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException; + + /** + * Checks if a partition is marked for event. + * + * @param dbName the db name + * @param tblName the table name + * @param partKVs the key-values associated with the partition. + * @param eventType the event type + * @return true, if is partition marked for event + * @throws HCatException + */ + public abstract boolean isPartitionMarkedForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException; + + /** + * Gets the delegation token. + * + * @param owner the owner + * @param renewerKerberosPrincipalName the renewer kerberos principal name + * @return the delegation token + * @throws HCatException,ConnectionFailureException + */ + public abstract String getDelegationToken(String owner, + String renewerKerberosPrincipalName) throws HCatException; + + /** + * Renew delegation token. + * + * @param tokenStrForm the token string + * @return the new expiration time + * @throws HCatException + */ + public abstract long renewDelegationToken(String tokenStrForm) + throws HCatException; + + /** + * Cancel delegation token. + * + * @param tokenStrForm the token string + * @throws HCatException + */ + public abstract void cancelDelegationToken(String tokenStrForm) + throws HCatException; + + /** + * Retrieve Message-bus topic for a table. + * + * @param dbName The name of the DB. + * @param tableName The name of the table. + * @return Topic-name for the message-bus on which messages will be sent for the specified table. + * By default, this is set to .. Returns null when not set. + */ + public abstract String getMessageBusTopicName(String dbName, String tableName) throws HCatException; + + /** + * Close the hcatalog client. + * + * @throws HCatException + */ + public abstract void close() throws HCatException; } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClientHMSImpl.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClientHMSImpl.java index 0dbb47f..e8034bc 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClientHMSImpl.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClientHMSImpl.java @@ -55,670 +55,670 @@ */ public class HCatClientHMSImpl extends HCatClient { - private HiveMetaStoreClient hmsClient; - private Configuration config; - private HiveConf hiveConfig; - - @Override - public List listDatabaseNamesByPattern(String pattern) - throws HCatException { - List dbNames = null; - try { - dbNames = hmsClient.getDatabases(pattern); - } catch (MetaException exp) { - throw new HCatException("MetaException while listing db names", exp); - } - return dbNames; - } - - @Override - public HCatDatabase getDatabase(String dbName) throws HCatException { - HCatDatabase db = null; - try { - Database hiveDB = hmsClient.getDatabase(checkDB(dbName)); - if (hiveDB != null) { - db = new HCatDatabase(hiveDB); - } - } catch (NoSuchObjectException exp) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching database", exp); - } catch (MetaException exp) { - throw new HCatException("MetaException while fetching database", - exp); - } catch (TException exp) { - throw new ConnectionFailureException( - "TException while fetching database", exp); - } - return db; - } - - @Override - public void createDatabase(HCatCreateDBDesc dbInfo) throws HCatException { - try { - hmsClient.createDatabase(dbInfo.toHiveDb()); - } catch (AlreadyExistsException exp) { - if (!dbInfo.getIfNotExists()) { - throw new HCatException( - "AlreadyExistsException while creating database", exp); - } - } catch (InvalidObjectException exp) { - throw new HCatException( - "InvalidObjectException while creating database", exp); - } catch (MetaException exp) { - throw new HCatException("MetaException while creating database", - exp); - } catch (TException exp) { - throw new ConnectionFailureException( - "TException while creating database", exp); - } + private HiveMetaStoreClient hmsClient; + private Configuration config; + private HiveConf hiveConfig; + + @Override + public List listDatabaseNamesByPattern(String pattern) + throws HCatException { + List dbNames = null; + try { + dbNames = hmsClient.getDatabases(pattern); + } catch (MetaException exp) { + throw new HCatException("MetaException while listing db names", exp); } - - @Override - public void dropDatabase(String dbName, boolean ifExists, DropDBMode mode) - throws HCatException { - boolean isCascade = mode.toString().equalsIgnoreCase("cascade"); - try { - hmsClient.dropDatabase(checkDB(dbName), true, ifExists, isCascade); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping db.", e); - } - } catch (InvalidOperationException e) { - throw new HCatException( - "InvalidOperationException while dropping db.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while dropping db.", e); - } catch (TException e) { - throw new ConnectionFailureException("TException while dropping db.", - e); - } + return dbNames; + } + + @Override + public HCatDatabase getDatabase(String dbName) throws HCatException { + HCatDatabase db = null; + try { + Database hiveDB = hmsClient.getDatabase(checkDB(dbName)); + if (hiveDB != null) { + db = new HCatDatabase(hiveDB); + } + } catch (NoSuchObjectException exp) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching database", exp); + } catch (MetaException exp) { + throw new HCatException("MetaException while fetching database", + exp); + } catch (TException exp) { + throw new ConnectionFailureException( + "TException while fetching database", exp); + } + return db; + } + + @Override + public void createDatabase(HCatCreateDBDesc dbInfo) throws HCatException { + try { + hmsClient.createDatabase(dbInfo.toHiveDb()); + } catch (AlreadyExistsException exp) { + if (!dbInfo.getIfNotExists()) { + throw new HCatException( + "AlreadyExistsException while creating database", exp); + } + } catch (InvalidObjectException exp) { + throw new HCatException( + "InvalidObjectException while creating database", exp); + } catch (MetaException exp) { + throw new HCatException("MetaException while creating database", + exp); + } catch (TException exp) { + throw new ConnectionFailureException( + "TException while creating database", exp); + } + } + + @Override + public void dropDatabase(String dbName, boolean ifExists, DropDBMode mode) + throws HCatException { + boolean isCascade = mode.toString().equalsIgnoreCase("cascade"); + try { + hmsClient.dropDatabase(checkDB(dbName), true, ifExists, isCascade); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping db.", e); + } + } catch (InvalidOperationException e) { + throw new HCatException( + "InvalidOperationException while dropping db.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while dropping db.", e); + } catch (TException e) { + throw new ConnectionFailureException("TException while dropping db.", + e); + } + } + + @Override + public List listTableNamesByPattern(String dbName, + String tablePattern) throws HCatException { + List tableNames = null; + try { + tableNames = hmsClient.getTables(checkDB(dbName), tablePattern); + } catch (MetaException e) { + throw new HCatException( + "MetaException while fetching table names.", e); + } + return tableNames; + } + + @Override + public HCatTable getTable(String dbName, String tableName) + throws HCatException { + HCatTable table = null; + try { + Table hiveTable = hmsClient.getTable(checkDB(dbName), tableName); + if (hiveTable != null) { + table = new HCatTable(hiveTable); + } + } catch (MetaException e) { + throw new HCatException("MetaException while fetching table.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while fetching table.", e); + } + return table; + } + + @Override + public void createTable(HCatCreateTableDesc createTableDesc) + throws HCatException { + try { + hmsClient.createTable(createTableDesc.toHiveTable(hiveConfig)); + } catch (AlreadyExistsException e) { + if (!createTableDesc.getIfNotExists()) { + throw new HCatException( + "AlreadyExistsException while creating table.", e); + } + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while creating table.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while creating table.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while creating table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while creating table.", e); + } catch (IOException e) { + throw new HCatException("IOException while creating hive conf.", e); } - @Override - public List listTableNamesByPattern(String dbName, - String tablePattern) throws HCatException { - List tableNames = null; - try { - tableNames = hmsClient.getTables(checkDB(dbName), tablePattern); - } catch (MetaException e) { - throw new HCatException( - "MetaException while fetching table names.", e); - } - return tableNames; - } - - @Override - public HCatTable getTable(String dbName, String tableName) - throws HCatException { - HCatTable table = null; - try { - Table hiveTable = hmsClient.getTable(checkDB(dbName), tableName); - if (hiveTable != null) { - table = new HCatTable(hiveTable); - } - } catch (MetaException e) { - throw new HCatException("MetaException while fetching table.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while fetching table.", e); - } - return table; - } - - @Override - public void createTable(HCatCreateTableDesc createTableDesc) - throws HCatException { - try { - hmsClient.createTable(createTableDesc.toHiveTable(hiveConfig)); - } catch (AlreadyExistsException e) { - if (!createTableDesc.getIfNotExists()) { - throw new HCatException( - "AlreadyExistsException while creating table.", e); - } - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while creating table.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while creating table.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while creating table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while creating table.", e); - } catch (IOException e) { - throw new HCatException("IOException while creating hive conf.", e); - } + } + @Override + public void updateTableSchema(String dbName, String tableName, List columnSchema) + throws HCatException { + try { + Table table = hmsClient.getTable(dbName, tableName); + table.getSd().setCols(HCatSchemaUtils.getFieldSchemas(columnSchema)); + hmsClient.alter_table(dbName, tableName, table); } - - @Override - public void updateTableSchema(String dbName, String tableName, List columnSchema) - throws HCatException { - try { - Table table = hmsClient.getTable(dbName, tableName); - table.getSd().setCols(HCatSchemaUtils.getFieldSchemas(columnSchema)); - hmsClient.alter_table(dbName, tableName, table); - } - catch (InvalidOperationException e) { - throw new HCatException("InvalidOperationException while updating table schema.", e); - } - catch (MetaException e) { - throw new HCatException("MetaException while updating table schema.", e); - } - catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while updating table schema.", e); - } - catch (TException e) { - throw new ConnectionFailureException( - "TException while updating table schema.", e); - } + catch (InvalidOperationException e) { + throw new HCatException("InvalidOperationException while updating table schema.", e); } - - @Override - public void createTableLike(String dbName, String existingTblName, - String newTableName, boolean ifNotExists, boolean isExternal, - String location) throws HCatException { - - Table hiveTable = getHiveTableLike(checkDB(dbName), existingTblName, - newTableName, ifNotExists, location); - if (hiveTable != null) { - try { - hmsClient.createTable(hiveTable); - } catch (AlreadyExistsException e) { - if (!ifNotExists) { - throw new HCatException( - "A table already exists with the name " - + newTableName, e); - } - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException in create table like command.", - e); - } catch (MetaException e) { - throw new HCatException( - "MetaException in create table like command.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException in create table like command.", - e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException in create table like command.", e); - } - } + catch (MetaException e) { + throw new HCatException("MetaException while updating table schema.", e); } - - @Override - public void dropTable(String dbName, String tableName, boolean ifExists) - throws HCatException { - try { - hmsClient.dropTable(checkDB(dbName), tableName, true, ifExists); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping table.", e); - } - } catch (MetaException e) { - throw new HCatException("MetaException while dropping table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while dropping table.", e); - } + catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while updating table schema.", e); } - - @Override - public void renameTable(String dbName, String oldName, String newName) - throws HCatException { - Table tbl; - try { - Table oldtbl = hmsClient.getTable(checkDB(dbName), oldName); - if (oldtbl != null) { - // TODO : Should be moved out. - if (oldtbl - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) != null) { - throw new HCatException( - "Cannot use rename command on a non-native table"); - } - tbl = new Table(oldtbl); - tbl.setTableName(newName); - hmsClient.alter_table(checkDB(dbName), oldName, tbl); - } - } catch (MetaException e) { - throw new HCatException("MetaException while renaming table", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while renaming table", e); - } catch (InvalidOperationException e) { - throw new HCatException( - "InvalidOperationException while renaming table", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while renaming table", e); - } + catch (TException e) { + throw new ConnectionFailureException( + "TException while updating table schema.", e); } - - @Override - public List getPartitions(String dbName, String tblName) - throws HCatException { - List hcatPtns = new ArrayList(); - try { - List hivePtns = hmsClient.listPartitions( - checkDB(dbName), tblName, (short) -1); - for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); - } - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving partition.", e); - } catch (MetaException e) { - throw new HCatException( - "MetaException while retrieving partition.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving partition.", e); - } - return hcatPtns; + } + + @Override + public void createTableLike(String dbName, String existingTblName, + String newTableName, boolean ifNotExists, boolean isExternal, + String location) throws HCatException { + + Table hiveTable = getHiveTableLike(checkDB(dbName), existingTblName, + newTableName, ifNotExists, location); + if (hiveTable != null) { + try { + hmsClient.createTable(hiveTable); + } catch (AlreadyExistsException e) { + if (!ifNotExists) { + throw new HCatException( + "A table already exists with the name " + + newTableName, e); + } + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException in create table like command.", + e); + } catch (MetaException e) { + throw new HCatException( + "MetaException in create table like command.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException in create table like command.", + e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException in create table like command.", e); + } } - - @Override - public List getPartitions(String dbName, String tblName, Map partitionSpec) throws HCatException { - return listPartitionsByFilter(dbName, tblName, getFilterString(partitionSpec)); + } + + @Override + public void dropTable(String dbName, String tableName, boolean ifExists) + throws HCatException { + try { + hmsClient.dropTable(checkDB(dbName), tableName, true, ifExists); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping table.", e); + } + } catch (MetaException e) { + throw new HCatException("MetaException while dropping table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while dropping table.", e); + } + } + + @Override + public void renameTable(String dbName, String oldName, String newName) + throws HCatException { + Table tbl; + try { + Table oldtbl = hmsClient.getTable(checkDB(dbName), oldName); + if (oldtbl != null) { + // TODO : Should be moved out. + if (oldtbl + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) != null) { + throw new HCatException( + "Cannot use rename command on a non-native table"); + } + tbl = new Table(oldtbl); + tbl.setTableName(newName); + hmsClient.alter_table(checkDB(dbName), oldName, tbl); + } + } catch (MetaException e) { + throw new HCatException("MetaException while renaming table", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while renaming table", e); + } catch (InvalidOperationException e) { + throw new HCatException( + "InvalidOperationException while renaming table", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while renaming table", e); } + } + + @Override + public List getPartitions(String dbName, String tblName) + throws HCatException { + List hcatPtns = new ArrayList(); + try { + List hivePtns = hmsClient.listPartitions( + checkDB(dbName), tblName, (short) -1); + for (Partition ptn : hivePtns) { + hcatPtns.add(new HCatPartition(ptn)); + } + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving partition.", e); + } catch (MetaException e) { + throw new HCatException( + "MetaException while retrieving partition.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving partition.", e); + } + return hcatPtns; + } - private static String getFilterString(Map partitionSpec) { - final String AND = " AND "; + @Override + public List getPartitions(String dbName, String tblName, Map partitionSpec) throws HCatException { + return listPartitionsByFilter(dbName, tblName, getFilterString(partitionSpec)); + } - StringBuilder filter = new StringBuilder(); - for (Map.Entry entry : partitionSpec.entrySet()) { - filter.append(entry.getKey()).append("=").append("\"").append(entry.getValue()).append("\"").append(AND); - } + private static String getFilterString(Map partitionSpec) { + final String AND = " AND "; - int length = filter.toString().length(); - if (length > 0) - filter.delete(length - AND.length(), length); - - return filter.toString(); - } - - @Override - public HCatPartition getPartition(String dbName, String tableName, - Map partitionSpec) throws HCatException { - HCatPartition partition = null; - try { - List partitionColumns = getTable(checkDB(dbName), tableName).getPartCols(); - if (partitionColumns.size() != partitionSpec.size()) { - throw new HCatException("Partition-spec doesn't have the right number of partition keys."); - } - - ArrayList ptnValues = new ArrayList(); - for (HCatFieldSchema partitionColumn : partitionColumns) { - String partKey = partitionColumn.getName(); - if (partitionSpec.containsKey(partKey)) { - ptnValues.add(partitionSpec.get(partKey)); // Partition-keys added in order. - } - else { - throw new HCatException("Invalid partition-key specified: " + partKey); - } - } - Partition hivePartition = hmsClient.getPartition(checkDB(dbName), - tableName, ptnValues); - if (hivePartition != null) { - partition = new HCatPartition(hivePartition); - } - } catch (MetaException e) { - throw new HCatException( - "MetaException while retrieving partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving partition.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving partition.", e); - } - return partition; - } - - @Override - public void addPartition(HCatAddPartitionDesc partInfo) - throws HCatException { - Table tbl = null; - try { - tbl = hmsClient.getTable(partInfo.getDatabaseName(), - partInfo.getTableName()); - // TODO: Should be moved out. - if (tbl.getPartitionKeysSize() == 0) { - throw new HCatException("The table " + partInfo.getTableName() - + " is not partitioned."); - } - - hmsClient.add_partition(partInfo.toHivePartition(tbl)); - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while adding partition.", e); - } catch (AlreadyExistsException e) { - throw new HCatException( - "AlreadyExistsException while adding partition.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while adding partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException("The table " + partInfo.getTableName() - + " is could not be found.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while adding partition.", e); - } + StringBuilder filter = new StringBuilder(); + for (Map.Entry entry : partitionSpec.entrySet()) { + filter.append(entry.getKey()).append("=").append("\"").append(entry.getValue()).append("\"").append(AND); } - @Override - public void dropPartitions(String dbName, String tableName, - Map partitionSpec, boolean ifExists) - throws HCatException { - try { - dbName = checkDB(dbName); - List partitions = hmsClient.listPartitionsByFilter(dbName, tableName, - getFilterString(partitionSpec), (short)-1); - - for (Partition partition : partitions) { - dropPartition(partition, ifExists); - } - - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping partition. " + - "Either db(" + dbName + ") or table(" + tableName + ") missing.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while dropping partition.", - e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while dropping partition.", e); - } + int length = filter.toString().length(); + if (length > 0) + filter.delete(length - AND.length(), length); + + return filter.toString(); + } + + @Override + public HCatPartition getPartition(String dbName, String tableName, + Map partitionSpec) throws HCatException { + HCatPartition partition = null; + try { + List partitionColumns = getTable(checkDB(dbName), tableName).getPartCols(); + if (partitionColumns.size() != partitionSpec.size()) { + throw new HCatException("Partition-spec doesn't have the right number of partition keys."); + } + + ArrayList ptnValues = new ArrayList(); + for (HCatFieldSchema partitionColumn : partitionColumns) { + String partKey = partitionColumn.getName(); + if (partitionSpec.containsKey(partKey)) { + ptnValues.add(partitionSpec.get(partKey)); // Partition-keys added in order. + } + else { + throw new HCatException("Invalid partition-key specified: " + partKey); + } + } + Partition hivePartition = hmsClient.getPartition(checkDB(dbName), + tableName, ptnValues); + if (hivePartition != null) { + partition = new HCatPartition(hivePartition); + } + } catch (MetaException e) { + throw new HCatException( + "MetaException while retrieving partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving partition.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving partition.", e); } - - private void dropPartition(Partition partition, boolean ifExists) - throws HCatException, MetaException, TException { - try { - hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues()); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping partition: " + partition.getValues(), e); - } - } + return partition; + } + + @Override + public void addPartition(HCatAddPartitionDesc partInfo) + throws HCatException { + Table tbl = null; + try { + tbl = hmsClient.getTable(partInfo.getDatabaseName(), + partInfo.getTableName()); + // TODO: Should be moved out. + if (tbl.getPartitionKeysSize() == 0) { + throw new HCatException("The table " + partInfo.getTableName() + + " is not partitioned."); + } + + hmsClient.add_partition(partInfo.toHivePartition(tbl)); + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while adding partition.", e); + } catch (AlreadyExistsException e) { + throw new HCatException( + "AlreadyExistsException while adding partition.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while adding partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException("The table " + partInfo.getTableName() + + " is could not be found.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while adding partition.", e); } - - @Override - public List listPartitionsByFilter(String dbName, - String tblName, String filter) throws HCatException { - List hcatPtns = new ArrayList(); - try { - List hivePtns = hmsClient.listPartitionsByFilter( - checkDB(dbName), tblName, filter, (short) -1); - for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); - } - } catch (MetaException e) { - throw new HCatException("MetaException while fetching partitions.", - e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching partitions.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while fetching partitions.", e); - } - return hcatPtns; - } - - @Override - public void markPartitionForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException { - try { - hmsClient.markPartitionForEvent(checkDB(dbName), tblName, partKVs, - eventType); - } catch (MetaException e) { - throw new HCatException( - "MetaException while marking partition for event.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while marking partition for event.", - e); - } catch (UnknownTableException e) { - throw new HCatException( - "UnknownTableException while marking partition for event.", - e); - } catch (UnknownDBException e) { - throw new HCatException( - "UnknownDBException while marking partition for event.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while marking partition for event.", e); - } + } + + @Override + public void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists) + throws HCatException { + try { + dbName = checkDB(dbName); + List partitions = hmsClient.listPartitionsByFilter(dbName, tableName, + getFilterString(partitionSpec), (short)-1); + + for (Partition partition : partitions) { + dropPartition(partition, ifExists); + } + + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping partition. " + + "Either db(" + dbName + ") or table(" + tableName + ") missing.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while dropping partition.", + e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while dropping partition.", e); } - - @Override - public boolean isPartitionMarkedForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException { - boolean isMarked = false; - try { - isMarked = hmsClient.isPartitionMarkedForEvent(checkDB(dbName), - tblName, partKVs, eventType); - } catch (MetaException e) { - throw new HCatException( - "MetaException while checking partition for event.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while checking partition for event.", - e); - } catch (UnknownTableException e) { - throw new HCatException( - "UnknownTableException while checking partition for event.", - e); - } catch (UnknownDBException e) { - throw new HCatException( - "UnknownDBException while checking partition for event.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while checking partition for event.", e); - } - return isMarked; - } - - @Override - public String getDelegationToken(String owner, - String renewerKerberosPrincipalName) throws HCatException { - String token = null; - try { - token = hmsClient.getDelegationToken(owner, - renewerKerberosPrincipalName); - } catch (MetaException e) { - throw new HCatException( - "MetaException while getting delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while getting delegation token.", e); - } - - return token; + } + + private void dropPartition(Partition partition, boolean ifExists) + throws HCatException, MetaException, TException { + try { + hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues()); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping partition: " + partition.getValues(), e); + } } - - @Override - public long renewDelegationToken(String tokenStrForm) throws HCatException { - long time = 0; - try { - time = hmsClient.renewDelegationToken(tokenStrForm); - } catch (MetaException e) { - throw new HCatException( - "MetaException while renewing delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while renewing delegation token.", e); - } - - return time; + } + + @Override + public List listPartitionsByFilter(String dbName, + String tblName, String filter) throws HCatException { + List hcatPtns = new ArrayList(); + try { + List hivePtns = hmsClient.listPartitionsByFilter( + checkDB(dbName), tblName, filter, (short) -1); + for (Partition ptn : hivePtns) { + hcatPtns.add(new HCatPartition(ptn)); + } + } catch (MetaException e) { + throw new HCatException("MetaException while fetching partitions.", + e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching partitions.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while fetching partitions.", e); } - - @Override - public void cancelDelegationToken(String tokenStrForm) - throws HCatException { - try { - hmsClient.cancelDelegationToken(tokenStrForm); - } catch (MetaException e) { - throw new HCatException( - "MetaException while canceling delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while canceling delegation token.", e); - } + return hcatPtns; + } + + @Override + public void markPartitionForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException { + try { + hmsClient.markPartitionForEvent(checkDB(dbName), tblName, partKVs, + eventType); + } catch (MetaException e) { + throw new HCatException( + "MetaException while marking partition for event.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while marking partition for event.", + e); + } catch (UnknownTableException e) { + throw new HCatException( + "UnknownTableException while marking partition for event.", + e); + } catch (UnknownDBException e) { + throw new HCatException( + "UnknownDBException while marking partition for event.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while marking partition for event.", e); } - - /* - * @param conf /* @throws HCatException,ConnectionFailureException - * - * @see - * org.apache.hcatalog.api.HCatClient#initialize(org.apache.hadoop.conf. - * Configuration) - */ - @Override - void initialize(Configuration conf) throws HCatException { - this.config = conf; - try { - hiveConfig = HCatUtil.getHiveConf(config); - hmsClient = HCatUtil.getHiveClient(hiveConfig); - } catch (MetaException exp) { - throw new HCatException("MetaException while creating HMS client", - exp); - } catch (IOException exp) { - throw new HCatException("IOException while creating HMS client", - exp); - } - + } + + @Override + public boolean isPartitionMarkedForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException { + boolean isMarked = false; + try { + isMarked = hmsClient.isPartitionMarkedForEvent(checkDB(dbName), + tblName, partKVs, eventType); + } catch (MetaException e) { + throw new HCatException( + "MetaException while checking partition for event.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while checking partition for event.", + e); + } catch (UnknownTableException e) { + throw new HCatException( + "UnknownTableException while checking partition for event.", + e); + } catch (UnknownDBException e) { + throw new HCatException( + "UnknownDBException while checking partition for event.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while checking partition for event.", e); } - - private Table getHiveTableLike(String dbName, String existingTblName, - String newTableName, boolean isExternal, String location) - throws HCatException { - Table oldtbl = null; - Table newTable = null; - try { - oldtbl = hmsClient.getTable(checkDB(dbName), existingTblName); - } catch (MetaException e1) { - throw new HCatException( - "MetaException while retrieving existing table.", e1); - } catch (NoSuchObjectException e1) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving existing table.", - e1); - } catch (TException e1) { - throw new ConnectionFailureException( - "TException while retrieving existing table.", e1); - } - if (oldtbl != null) { - newTable = new Table(); - newTable.setTableName(newTableName); - newTable.setDbName(dbName); - StorageDescriptor sd = new StorageDescriptor(oldtbl.getSd()); - newTable.setSd(sd); - newTable.setParameters(oldtbl.getParameters()); - if (location == null) { - newTable.getSd().setLocation(oldtbl.getSd().getLocation()); - } else { - newTable.getSd().setLocation(location); - } - if (isExternal) { - newTable.putToParameters("EXTERNAL", "TRUE"); - newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - newTable.getParameters().remove("EXTERNAL"); - } - // set create time - newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); - newTable.setLastAccessTimeIsSet(false); - } - return newTable; + return isMarked; + } + + @Override + public String getDelegationToken(String owner, + String renewerKerberosPrincipalName) throws HCatException { + String token = null; + try { + token = hmsClient.getDelegationToken(owner, + renewerKerberosPrincipalName); + } catch (MetaException e) { + throw new HCatException( + "MetaException while getting delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while getting delegation token.", e); } - /* - * @throws HCatException - * - * @see org.apache.hcatalog.api.HCatClient#closeClient() - */ - @Override - public void close() throws HCatException { - hmsClient.close(); + return token; + } + + @Override + public long renewDelegationToken(String tokenStrForm) throws HCatException { + long time = 0; + try { + time = hmsClient.renewDelegationToken(tokenStrForm); + } catch (MetaException e) { + throw new HCatException( + "MetaException while renewing delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while renewing delegation token.", e); } - private String checkDB(String name) { - if (StringUtils.isEmpty(name)) { - return MetaStoreUtils.DEFAULT_DATABASE_NAME; - } else { - return name; - } + return time; + } + + @Override + public void cancelDelegationToken(String tokenStrForm) + throws HCatException { + try { + hmsClient.cancelDelegationToken(tokenStrForm); + } catch (MetaException e) { + throw new HCatException( + "MetaException while canceling delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while canceling delegation token.", e); + } + } + + /* + * @param conf /* @throws HCatException,ConnectionFailureException + * + * @see + * org.apache.hcatalog.api.HCatClient#initialize(org.apache.hadoop.conf. + * Configuration) + */ + @Override + void initialize(Configuration conf) throws HCatException { + this.config = conf; + try { + hiveConfig = HCatUtil.getHiveConf(config); + hmsClient = HCatUtil.getHiveClient(hiveConfig); + } catch (MetaException exp) { + throw new HCatException("MetaException while creating HMS client", + exp); + } catch (IOException exp) { + throw new HCatException("IOException while creating HMS client", + exp); } - /* - * @param partInfoList - * @return The size of the list of partitions. - * @throws HCatException,ConnectionFailureException - * @see org.apache.hcatalog.api.HCatClient#addPartitions(java.util.List) - */ - @Override - public int addPartitions(List partInfoList) - throws HCatException { - int numPartitions = -1; - if ((partInfoList == null) || (partInfoList.size() == 0)) { - throw new HCatException("The partition list is null or empty."); - } + } + + private Table getHiveTableLike(String dbName, String existingTblName, + String newTableName, boolean isExternal, String location) + throws HCatException { + Table oldtbl = null; + Table newTable = null; + try { + oldtbl = hmsClient.getTable(checkDB(dbName), existingTblName); + } catch (MetaException e1) { + throw new HCatException( + "MetaException while retrieving existing table.", e1); + } catch (NoSuchObjectException e1) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving existing table.", + e1); + } catch (TException e1) { + throw new ConnectionFailureException( + "TException while retrieving existing table.", e1); + } + if (oldtbl != null) { + newTable = new Table(); + newTable.setTableName(newTableName); + newTable.setDbName(dbName); + StorageDescriptor sd = new StorageDescriptor(oldtbl.getSd()); + newTable.setSd(sd); + newTable.setParameters(oldtbl.getParameters()); + if (location == null) { + newTable.getSd().setLocation(oldtbl.getSd().getLocation()); + } else { + newTable.getSd().setLocation(location); + } + if (isExternal) { + newTable.putToParameters("EXTERNAL", "TRUE"); + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + newTable.getParameters().remove("EXTERNAL"); + } + // set create time + newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + newTable.setLastAccessTimeIsSet(false); + } + return newTable; + } + + /* + * @throws HCatException + * + * @see org.apache.hcatalog.api.HCatClient#closeClient() + */ + @Override + public void close() throws HCatException { + hmsClient.close(); + } + + private String checkDB(String name) { + if (StringUtils.isEmpty(name)) { + return MetaStoreUtils.DEFAULT_DATABASE_NAME; + } else { + return name; + } + } + + /* + * @param partInfoList + * @return The size of the list of partitions. + * @throws HCatException,ConnectionFailureException + * @see org.apache.hcatalog.api.HCatClient#addPartitions(java.util.List) + */ + @Override + public int addPartitions(List partInfoList) + throws HCatException { + int numPartitions = -1; + if ((partInfoList == null) || (partInfoList.size() == 0)) { + throw new HCatException("The partition list is null or empty."); + } - Table tbl = null; - try { - tbl = hmsClient.getTable(partInfoList.get(0).getDatabaseName(), - partInfoList.get(0).getTableName()); - ArrayList ptnList = new ArrayList(); - for (HCatAddPartitionDesc desc : partInfoList) { - ptnList.add(desc.toHivePartition(tbl)); - } - numPartitions = hmsClient.add_partitions(ptnList); - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while adding partition.", e); - } catch (AlreadyExistsException e) { - throw new HCatException( - "AlreadyExistsException while adding partition.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while adding partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException("The table " - + partInfoList.get(0).getTableName() - + " is could not be found.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while adding partition.", e); - } - return numPartitions; + Table tbl = null; + try { + tbl = hmsClient.getTable(partInfoList.get(0).getDatabaseName(), + partInfoList.get(0).getTableName()); + ArrayList ptnList = new ArrayList(); + for (HCatAddPartitionDesc desc : partInfoList) { + ptnList.add(desc.toHivePartition(tbl)); + } + numPartitions = hmsClient.add_partitions(ptnList); + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while adding partition.", e); + } catch (AlreadyExistsException e) { + throw new HCatException( + "AlreadyExistsException while adding partition.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while adding partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException("The table " + + partInfoList.get(0).getTableName() + + " is could not be found.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while adding partition.", e); } + return numPartitions; + } - @Override - public String getMessageBusTopicName(String dbName, String tableName) throws HCatException { - try { - return hmsClient.getTable(dbName, tableName).getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); - } - catch (MetaException e) { - throw new HCatException("MetaException while retrieving JMS Topic name.", e); - } catch (NoSuchObjectException e) { - throw new HCatException("Could not find DB:" + dbName + " or Table:" + tableName, e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving JMS Topic name.", e); - } + @Override + public String getMessageBusTopicName(String dbName, String tableName) throws HCatException { + try { + return hmsClient.getTable(dbName, tableName).getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); + } + catch (MetaException e) { + throw new HCatException("MetaException while retrieving JMS Topic name.", e); + } catch (NoSuchObjectException e) { + throw new HCatException("Could not find DB:" + dbName + " or Table:" + tableName, e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving JMS Topic name.", e); } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateDBDesc.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateDBDesc.java index 25080ac..b929833 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateDBDesc.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateDBDesc.java @@ -29,167 +29,167 @@ */ public class HCatCreateDBDesc { + private String dbName; + private String locationUri; + private String comment; + private Map dbProperties; + private boolean ifNotExits = false; + + /** + * Gets the database properties. + * + * @return the database properties + */ + public Map getDatabaseProperties() { + return this.dbProperties; + } + + /** + * Gets the if not exists. + * + * @return the if not exists + */ + public boolean getIfNotExists() { + return this.ifNotExits; + } + + /** + * Gets the comments. + * + * @return the comments + */ + public String getComments() { + return this.comment; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.locationUri; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + private HCatCreateDBDesc(String dbName) { + this.dbName = dbName; + } + + @Override + public String toString() { + return "HCatCreateDBDesc [" + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (locationUri != null ? "location=" + locationUri + ", " + : "location=null") + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (dbProperties != null ? "dbProperties=" + dbProperties + ", " + : "dbProperties=null") + "ifNotExits=" + ifNotExits + "]"; + } + + /** + * Creates the builder for defining attributes. + * + * @param dbName the db name + * @return the builder + */ + public static Builder create(String dbName) { + return new Builder(dbName); + } + + Database toHiveDb() { + Database hiveDB = new Database(); + hiveDB.setDescription(this.comment); + hiveDB.setLocationUri(this.locationUri); + hiveDB.setName(this.dbName); + hiveDB.setParameters(this.dbProperties); + return hiveDB; + } + + public static class Builder { + + private String innerLoc; + private String innerComment; + private Map innerDBProps; private String dbName; - private String locationUri; - private String comment; - private Map dbProperties; - private boolean ifNotExits = false; + private boolean ifNotExists = false; - /** - * Gets the database properties. - * - * @return the database properties - */ - public Map getDatabaseProperties() { - return this.dbProperties; + private Builder(String dbName) { + this.dbName = dbName; } /** - * Gets the if not exists. + * Location. * - * @return the if not exists + * @param value the location of the database. + * @return the builder */ - public boolean getIfNotExists() { - return this.ifNotExits; + public Builder location(String value) { + this.innerLoc = value; + return this; } /** - * Gets the comments. + * Comment. * - * @return the comments + * @param value comments. + * @return the builder */ - public String getComments() { - return this.comment; + public Builder comment(String value) { + this.innerComment = value; + return this; } /** - * Gets the location. - * - * @return the location + * If not exists. + * @param ifNotExists If set to true, hive will not throw exception, if a + * database with the same name already exists. + * @return the builder */ - public String getLocation() { - return this.locationUri; + public Builder ifNotExists(boolean ifNotExists) { + this.ifNotExists = ifNotExists; + return this; } /** - * Gets the database name. + * Database properties. * - * @return the database name + * @param dbProps the database properties + * @return the builder */ - public String getDatabaseName() { - return this.dbName; + public Builder databaseProperties(Map dbProps) { + this.innerDBProps = dbProps; + return this; } - private HCatCreateDBDesc(String dbName) { - this.dbName = dbName; - } - - @Override - public String toString() { - return "HCatCreateDBDesc [" - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (locationUri != null ? "location=" + locationUri + ", " - : "location=null") - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (dbProperties != null ? "dbProperties=" + dbProperties + ", " - : "dbProperties=null") + "ifNotExits=" + ifNotExits + "]"; - } /** - * Creates the builder for defining attributes. + * Builds the create database descriptor. * - * @param dbName the db name - * @return the builder + * @return An instance of HCatCreateDBDesc + * @throws HCatException */ - public static Builder create(String dbName) { - return new Builder(dbName); - } + public HCatCreateDBDesc build() throws HCatException { + if (this.dbName == null) { + throw new HCatException("Database name cannot be null."); + } + HCatCreateDBDesc desc = new HCatCreateDBDesc(this.dbName); + desc.comment = this.innerComment; + desc.locationUri = this.innerLoc; + desc.dbProperties = this.innerDBProps; + desc.ifNotExits = this.ifNotExists; + return desc; - Database toHiveDb() { - Database hiveDB = new Database(); - hiveDB.setDescription(this.comment); - hiveDB.setLocationUri(this.locationUri); - hiveDB.setName(this.dbName); - hiveDB.setParameters(this.dbProperties); - return hiveDB; } - public static class Builder { - - private String innerLoc; - private String innerComment; - private Map innerDBProps; - private String dbName; - private boolean ifNotExists = false; - - private Builder(String dbName) { - this.dbName = dbName; - } - - /** - * Location. - * - * @param value the location of the database. - * @return the builder - */ - public Builder location(String value) { - this.innerLoc = value; - return this; - } - - /** - * Comment. - * - * @param value comments. - * @return the builder - */ - public Builder comment(String value) { - this.innerComment = value; - return this; - } - - /** - * If not exists. - * @param ifNotExists If set to true, hive will not throw exception, if a - * database with the same name already exists. - * @return the builder - */ - public Builder ifNotExists(boolean ifNotExists) { - this.ifNotExists = ifNotExists; - return this; - } - - /** - * Database properties. - * - * @param dbProps the database properties - * @return the builder - */ - public Builder databaseProperties(Map dbProps) { - this.innerDBProps = dbProps; - return this; - } - - - /** - * Builds the create database descriptor. - * - * @return An instance of HCatCreateDBDesc - * @throws HCatException - */ - public HCatCreateDBDesc build() throws HCatException { - if (this.dbName == null) { - throw new HCatException("Database name cannot be null."); - } - HCatCreateDBDesc desc = new HCatCreateDBDesc(this.dbName); - desc.comment = this.innerComment; - desc.locationUri = this.innerLoc; - desc.dbProperties = this.innerDBProps; - desc.ifNotExits = this.ifNotExists; - return desc; - - } - - } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateTableDesc.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateTableDesc.java index 542f7a5..89b15d0 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateTableDesc.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateTableDesc.java @@ -54,468 +54,468 @@ @SuppressWarnings("deprecation") public class HCatCreateTableDesc { - private static final Logger LOG = LoggerFactory.getLogger(HCatCreateTableDesc.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatCreateTableDesc.class); + + private String tableName; + private String dbName; + private boolean isExternal; + private String comment; + private String location; + private List cols; + private List partCols; + private List bucketCols; + private int numBuckets; + private List sortCols; + private Map tblProps; + private boolean ifNotExists; + private String fileFormat; + private String inputformat; + private String outputformat; + private String serde; + private String storageHandler; + + private HCatCreateTableDesc(String dbName, String tableName, List columns) { + this.dbName = dbName; + this.tableName = tableName; + this.cols = columns; + } + + /** + * Creates a builder for defining attributes. + * + * @param dbName the db name + * @param tableName the table name + * @param columns the columns + * @return the builder + */ + public static Builder create(String dbName, String tableName, List columns) { + return new Builder(dbName, tableName, columns); + } + + Table toHiveTable(HiveConf conf) throws HCatException { + + Table newTable = new Table(); + newTable.setDbName(dbName); + newTable.setTableName(tableName); + if (tblProps != null) { + newTable.setParameters(tblProps); + } + + if (isExternal) { + newTable.putToParameters("EXTERNAL", "TRUE"); + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + newTable.setTableType(TableType.MANAGED_TABLE.toString()); + } + + StorageDescriptor sd = new StorageDescriptor(); + sd.setSerdeInfo(new SerDeInfo()); + if (location != null) { + sd.setLocation(location); + } + if (this.comment != null) { + newTable.putToParameters("comment", comment); + } + if (!StringUtils.isEmpty(fileFormat)) { + sd.setInputFormat(inputformat); + sd.setOutputFormat(outputformat); + if (serde != null) { + sd.getSerdeInfo().setSerializationLib(serde); + } else { + LOG.info("Using LazySimpleSerDe for table " + tableName); + sd.getSerdeInfo() + .setSerializationLib( + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class + .getName()); + } + } else { + try { + LOG.info("Creating instance of storage handler to get input/output, serder info."); + HiveStorageHandler sh = HiveUtils.getStorageHandler(conf, + storageHandler); + sd.setInputFormat(sh.getInputFormatClass().getName()); + sd.setOutputFormat(sh.getOutputFormatClass().getName()); + sd.getSerdeInfo().setSerializationLib( + sh.getSerDeClass().getName()); + newTable.putToParameters( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + storageHandler); + } catch (HiveException e) { + throw new HCatException( + "Exception while creating instance of storage handler", + e); + } + } + newTable.setSd(sd); + if (this.partCols != null) { + ArrayList hivePtnCols = new ArrayList(); + for (HCatFieldSchema fs : this.partCols) { + hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); + } + newTable.setPartitionKeys(hivePtnCols); + } + + if (this.cols != null) { + ArrayList hiveTblCols = new ArrayList(); + for (HCatFieldSchema fs : this.cols) { + hiveTblCols.add(HCatSchemaUtils.getFieldSchema(fs)); + } + newTable.getSd().setCols(hiveTblCols); + } + + if (this.bucketCols != null) { + newTable.getSd().setBucketCols(bucketCols); + newTable.getSd().setNumBuckets(numBuckets); + } + + if (this.sortCols != null) { + newTable.getSd().setSortCols(sortCols); + } + + newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + newTable.setLastAccessTimeIsSet(false); + return newTable; + } + + /** + * Gets the if not exists. + * + * @return the if not exists + */ + public boolean getIfNotExists() { + return this.ifNotExists; + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the cols. + * + * @return the cols + */ + public List getCols() { + return this.cols; + } + + /** + * Gets the partition cols. + * + * @return the partition cols + */ + public List getPartitionCols() { + return this.partCols; + } + + /** + * Gets the bucket cols. + * + * @return the bucket cols + */ + public List getBucketCols() { + return this.bucketCols; + } + + public int getNumBuckets() { + return this.numBuckets; + } + + /** + * Gets the comments. + * + * @return the comments + */ + public String getComments() { + return this.comment; + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return this.storageHandler; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.location; + } + + /** + * Gets the external. + * + * @return the external + */ + public boolean getExternal() { + return this.isExternal; + } + + /** + * Gets the sort cols. + * + * @return the sort cols + */ + public List getSortCols() { + return this.sortCols; + } + + /** + * Gets the tbl props. + * + * @return the tbl props + */ + public Map getTblProps() { + return this.tblProps; + } + + /** + * Gets the file format. + * + * @return the file format + */ + public String getFileFormat() { + return this.fileFormat; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + @Override + public String toString() { + return "HCatCreateTableDesc [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + "isExternal=" + + isExternal + + ", " + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (location != null ? "location=" + location + ", " : "location=null") + + (cols != null ? "cols=" + cols + ", " : "cols=null") + + (partCols != null ? "partCols=" + partCols + ", " : "partCols=null") + + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") + + "numBuckets=" + + numBuckets + + ", " + + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") + + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") + + "ifNotExists=" + + ifNotExists + + ", " + + (fileFormat != null ? "fileFormat=" + fileFormat + ", " : "fileFormat=null") + + (inputformat != null ? "inputformat=" + inputformat + ", " + : "inputformat=null") + + (outputformat != null ? "outputformat=" + outputformat + ", " + : "outputformat=null") + + (serde != null ? "serde=" + serde + ", " : "serde=null") + + (storageHandler != null ? "storageHandler=" + storageHandler + : "storageHandler=null") + "]"; + } + + public static class Builder { private String tableName; - private String dbName; private boolean isExternal; - private String comment; - private String location; private List cols; private List partCols; private List bucketCols; - private int numBuckets; private List sortCols; - private Map tblProps; - private boolean ifNotExists; + private int numBuckets; + private String comment; private String fileFormat; - private String inputformat; - private String outputformat; - private String serde; + private String location; private String storageHandler; + private Map tblProps; + private boolean ifNotExists; + private String dbName; - private HCatCreateTableDesc(String dbName, String tableName, List columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; - } - /** - * Creates a builder for defining attributes. - * - * @param dbName the db name - * @param tableName the table name - * @param columns the columns - * @return the builder - */ - public static Builder create(String dbName, String tableName, List columns) { - return new Builder(dbName, tableName, columns); + private Builder(String dbName, String tableName, List columns) { + this.dbName = dbName; + this.tableName = tableName; + this.cols = columns; } - Table toHiveTable(HiveConf conf) throws HCatException { - - Table newTable = new Table(); - newTable.setDbName(dbName); - newTable.setTableName(tableName); - if (tblProps != null) { - newTable.setParameters(tblProps); - } - - if (isExternal) { - newTable.putToParameters("EXTERNAL", "TRUE"); - newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - newTable.setTableType(TableType.MANAGED_TABLE.toString()); - } - - StorageDescriptor sd = new StorageDescriptor(); - sd.setSerdeInfo(new SerDeInfo()); - if (location != null) { - sd.setLocation(location); - } - if (this.comment != null) { - newTable.putToParameters("comment", comment); - } - if (!StringUtils.isEmpty(fileFormat)) { - sd.setInputFormat(inputformat); - sd.setOutputFormat(outputformat); - if (serde != null) { - sd.getSerdeInfo().setSerializationLib(serde); - } else { - LOG.info("Using LazySimpleSerDe for table " + tableName); - sd.getSerdeInfo() - .setSerializationLib( - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class - .getName()); - } - } else { - try { - LOG.info("Creating instance of storage handler to get input/output, serder info."); - HiveStorageHandler sh = HiveUtils.getStorageHandler(conf, - storageHandler); - sd.setInputFormat(sh.getInputFormatClass().getName()); - sd.setOutputFormat(sh.getOutputFormatClass().getName()); - sd.getSerdeInfo().setSerializationLib( - sh.getSerDeClass().getName()); - newTable.putToParameters( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - storageHandler); - } catch (HiveException e) { - throw new HCatException( - "Exception while creating instance of storage handler", - e); - } - } - newTable.setSd(sd); - if (this.partCols != null) { - ArrayList hivePtnCols = new ArrayList(); - for (HCatFieldSchema fs : this.partCols) { - hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.setPartitionKeys(hivePtnCols); - } - - if (this.cols != null) { - ArrayList hiveTblCols = new ArrayList(); - for (HCatFieldSchema fs : this.cols) { - hiveTblCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.getSd().setCols(hiveTblCols); - } - - if (this.bucketCols != null) { - newTable.getSd().setBucketCols(bucketCols); - newTable.getSd().setNumBuckets(numBuckets); - } - - if (this.sortCols != null) { - newTable.getSd().setSortCols(sortCols); - } - - newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); - newTable.setLastAccessTimeIsSet(false); - return newTable; - } /** - * Gets the if not exists. + * If not exists. * - * @return the if not exists + * @param ifNotExists If set to true, hive will not throw exception, if a + * table with the same name already exists. + * @return the builder */ - public boolean getIfNotExists() { - return this.ifNotExists; + public Builder ifNotExists(boolean ifNotExists) { + this.ifNotExists = ifNotExists; + return this; } - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } /** - * Gets the cols. + * Partition cols. * - * @return the cols + * @param partCols the partition cols + * @return the builder */ - public List getCols() { - return this.cols; + public Builder partCols(List partCols) { + this.partCols = partCols; + return this; } - /** - * Gets the partition cols. - * - * @return the partition cols - */ - public List getPartitionCols() { - return this.partCols; - } /** - * Gets the bucket cols. + * Bucket cols. * - * @return the bucket cols + * @param bucketCols the bucket cols + * @return the builder */ - public List getBucketCols() { - return this.bucketCols; - } - - public int getNumBuckets() { - return this.numBuckets; + public Builder bucketCols(List bucketCols, int buckets) { + this.bucketCols = bucketCols; + this.numBuckets = buckets; + return this; } /** - * Gets the comments. + * Storage handler. * - * @return the comments + * @param storageHandler the storage handler + * @return the builder */ - public String getComments() { - return this.comment; + public Builder storageHandler(String storageHandler) { + this.storageHandler = storageHandler; + return this; } /** - * Gets the storage handler. + * Location. * - * @return the storage handler + * @param location the location + * @return the builder */ - public String getStorageHandler() { - return this.storageHandler; + public Builder location(String location) { + this.location = location; + return this; } /** - * Gets the location. + * Comments. * - * @return the location + * @param comment the comment + * @return the builder */ - public String getLocation() { - return this.location; + public Builder comments(String comment) { + this.comment = comment; + return this; } /** - * Gets the external. + * Checks if is table external. * - * @return the external + * @param isExternal the is external + * @return the builder */ - public boolean getExternal() { - return this.isExternal; + public Builder isTableExternal(boolean isExternal) { + this.isExternal = isExternal; + return this; } /** - * Gets the sort cols. + * Sort cols. * - * @return the sort cols + * @param sortCols the sort cols + * @return the builder */ - public List getSortCols() { - return this.sortCols; + public Builder sortCols(ArrayList sortCols) { + this.sortCols = sortCols; + return this; } /** - * Gets the tbl props. + * Tbl props. * - * @return the tbl props + * @param tblProps the tbl props + * @return the builder */ - public Map getTblProps() { - return this.tblProps; + public Builder tblProps(Map tblProps) { + this.tblProps = tblProps; + return this; } /** - * Gets the file format. + * File format. * - * @return the file format + * @param format the format + * @return the builder */ - public String getFileFormat() { - return this.fileFormat; + public Builder fileFormat(String format) { + this.fileFormat = format; + return this; } /** - * Gets the database name. + * Builds the HCatCreateTableDesc. * - * @return the database name + * @return HCatCreateTableDesc + * @throws HCatException */ - public String getDatabaseName() { - return this.dbName; - } - - @Override - public String toString() { - return "HCatCreateTableDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + "isExternal=" - + isExternal - + ", " - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols=null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + "ifNotExists=" - + ifNotExists - + ", " - + (fileFormat != null ? "fileFormat=" + fileFormat + ", " : "fileFormat=null") - + (inputformat != null ? "inputformat=" + inputformat + ", " - : "inputformat=null") - + (outputformat != null ? "outputformat=" + outputformat + ", " - : "outputformat=null") - + (serde != null ? "serde=" + serde + ", " : "serde=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - : "storageHandler=null") + "]"; - } - - public static class Builder { - - private String tableName; - private boolean isExternal; - private List cols; - private List partCols; - private List bucketCols; - private List sortCols; - private int numBuckets; - private String comment; - private String fileFormat; - private String location; - private String storageHandler; - private Map tblProps; - private boolean ifNotExists; - private String dbName; - - - private Builder(String dbName, String tableName, List columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; - } - - - /** - * If not exists. - * - * @param ifNotExists If set to true, hive will not throw exception, if a - * table with the same name already exists. - * @return the builder - */ - public Builder ifNotExists(boolean ifNotExists) { - this.ifNotExists = ifNotExists; - return this; - } - - - /** - * Partition cols. - * - * @param partCols the partition cols - * @return the builder - */ - public Builder partCols(List partCols) { - this.partCols = partCols; - return this; - } - - - /** - * Bucket cols. - * - * @param bucketCols the bucket cols - * @return the builder - */ - public Builder bucketCols(List bucketCols, int buckets) { - this.bucketCols = bucketCols; - this.numBuckets = buckets; - return this; - } - - /** - * Storage handler. - * - * @param storageHandler the storage handler - * @return the builder - */ - public Builder storageHandler(String storageHandler) { - this.storageHandler = storageHandler; - return this; - } - - /** - * Location. - * - * @param location the location - * @return the builder - */ - public Builder location(String location) { - this.location = location; - return this; - } - - /** - * Comments. - * - * @param comment the comment - * @return the builder - */ - public Builder comments(String comment) { - this.comment = comment; - return this; - } - - /** - * Checks if is table external. - * - * @param isExternal the is external - * @return the builder - */ - public Builder isTableExternal(boolean isExternal) { - this.isExternal = isExternal; - return this; - } - - /** - * Sort cols. - * - * @param sortCols the sort cols - * @return the builder - */ - public Builder sortCols(ArrayList sortCols) { - this.sortCols = sortCols; - return this; - } - - /** - * Tbl props. - * - * @param tblProps the tbl props - * @return the builder - */ - public Builder tblProps(Map tblProps) { - this.tblProps = tblProps; - return this; - } - - /** - * File format. - * - * @param format the format - * @return the builder - */ - public Builder fileFormat(String format) { - this.fileFormat = format; - return this; - } - - /** - * Builds the HCatCreateTableDesc. - * - * @return HCatCreateTableDesc - * @throws HCatException - */ - public HCatCreateTableDesc build() throws HCatException { - if (this.dbName == null) { - LOG.info("Database name found null. Setting db to :" - + MetaStoreUtils.DEFAULT_DATABASE_NAME); - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatCreateTableDesc desc = new HCatCreateTableDesc(this.dbName, - this.tableName, this.cols); - desc.ifNotExists = this.ifNotExists; - desc.isExternal = this.isExternal; - desc.comment = this.comment; - desc.partCols = this.partCols; - desc.bucketCols = this.bucketCols; - desc.numBuckets = this.numBuckets; - desc.location = this.location; - desc.tblProps = this.tblProps; - desc.sortCols = this.sortCols; - desc.serde = null; - if (!StringUtils.isEmpty(fileFormat)) { - desc.fileFormat = fileFormat; - if ("SequenceFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = SequenceFileInputFormat.class.getName(); - desc.outputformat = SequenceFileOutputFormat.class - .getName(); - } else if ("RCFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = RCFileInputFormat.class.getName(); - desc.outputformat = RCFileOutputFormat.class.getName(); - desc.serde = ColumnarSerDe.class.getName(); - } - desc.storageHandler = StringUtils.EMPTY; - } else if (!StringUtils.isEmpty(storageHandler)) { - desc.storageHandler = storageHandler; - } else { - desc.fileFormat = "TextFile"; - LOG.info("Using text file format for the table."); - desc.inputformat = TextInputFormat.class.getName(); - LOG.info("Table input format:" + desc.inputformat); - desc.outputformat = IgnoreKeyTextOutputFormat.class - .getName(); - LOG.info("Table output format:" + desc.outputformat); - } - return desc; + public HCatCreateTableDesc build() throws HCatException { + if (this.dbName == null) { + LOG.info("Database name found null. Setting db to :" + + MetaStoreUtils.DEFAULT_DATABASE_NAME); + this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + } + HCatCreateTableDesc desc = new HCatCreateTableDesc(this.dbName, + this.tableName, this.cols); + desc.ifNotExists = this.ifNotExists; + desc.isExternal = this.isExternal; + desc.comment = this.comment; + desc.partCols = this.partCols; + desc.bucketCols = this.bucketCols; + desc.numBuckets = this.numBuckets; + desc.location = this.location; + desc.tblProps = this.tblProps; + desc.sortCols = this.sortCols; + desc.serde = null; + if (!StringUtils.isEmpty(fileFormat)) { + desc.fileFormat = fileFormat; + if ("SequenceFile".equalsIgnoreCase(fileFormat)) { + desc.inputformat = SequenceFileInputFormat.class.getName(); + desc.outputformat = SequenceFileOutputFormat.class + .getName(); + } else if ("RCFile".equalsIgnoreCase(fileFormat)) { + desc.inputformat = RCFileInputFormat.class.getName(); + desc.outputformat = RCFileOutputFormat.class.getName(); + desc.serde = ColumnarSerDe.class.getName(); } + desc.storageHandler = StringUtils.EMPTY; + } else if (!StringUtils.isEmpty(storageHandler)) { + desc.storageHandler = storageHandler; + } else { + desc.fileFormat = "TextFile"; + LOG.info("Using text file format for the table."); + desc.inputformat = TextInputFormat.class.getName(); + LOG.info("Table input format:" + desc.inputformat); + desc.outputformat = IgnoreKeyTextOutputFormat.class + .getName(); + LOG.info("Table output format:" + desc.outputformat); + } + return desc; } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatDatabase.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatDatabase.java index 36125dd..52853cb 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatDatabase.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatDatabase.java @@ -29,61 +29,61 @@ */ public class HCatDatabase { - private String dbName; - private String dbLocation; - private String comment; - private Map props; + private String dbName; + private String dbLocation; + private String comment; + private Map props; - HCatDatabase(Database db) { - this.dbName = db.getName(); - this.props = db.getParameters(); - this.dbLocation = db.getLocationUri(); - this.comment = db.getDescription(); - } + HCatDatabase(Database db) { + this.dbName = db.getName(); + this.props = db.getParameters(); + this.dbLocation = db.getLocationUri(); + this.comment = db.getDescription(); + } - /** - * Gets the database name. - * - * @return the database name - */ - public String getName() { - return dbName; - } + /** + * Gets the database name. + * + * @return the database name + */ + public String getName() { + return dbName; + } - /** - * Gets the dB location. - * - * @return the dB location - */ - public String getLocation() { - return dbLocation; - } + /** + * Gets the dB location. + * + * @return the dB location + */ + public String getLocation() { + return dbLocation; + } - /** - * Gets the comment. - * - * @return the comment - */ - public String getComment() { - return comment; - } + /** + * Gets the comment. + * + * @return the comment + */ + public String getComment() { + return comment; + } - /** - * Gets the dB properties. - * - * @return the dB properties - */ - public Map getProperties() { - return props; - } + /** + * Gets the dB properties. + * + * @return the dB properties + */ + public Map getProperties() { + return props; + } - @Override - public String toString() { - return "HCatDatabase [" - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (dbLocation != null ? "dbLocation=" + dbLocation + ", " : "dbLocation=null") - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (props != null ? "props=" + props : "props=null") + "]"; - } + @Override + public String toString() { + return "HCatDatabase [" + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (dbLocation != null ? "dbLocation=" + dbLocation + ", " : "dbLocation=null") + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (props != null ? "props=" + props : "props=null") + "]"; + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatPartition.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatPartition.java index 34905cb..f4bac8c 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatPartition.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatPartition.java @@ -36,170 +36,170 @@ */ public class HCatPartition { - private String tableName; - private String dbName; - private List values; - private List tableCols; - private int createTime; - private int lastAccessTime; - private StorageDescriptor sd; - private Map parameters; - - HCatPartition(Partition partition) throws HCatException { - this.tableName = partition.getTableName(); - this.dbName = partition.getDbName(); - this.createTime = partition.getCreateTime(); - this.lastAccessTime = partition.getLastAccessTime(); - this.parameters = partition.getParameters(); - this.values = partition.getValues(); - this.sd = partition.getSd(); - this.tableCols = new ArrayList(); - for (FieldSchema fs : this.sd.getCols()) { - this.tableCols.add(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } - - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } - - /** - * Gets the columns of the table. - * - * @return the columns - */ - public List getColumns() { - return this.tableCols; - } - - /** - * Gets the input format. - * - * @return the input format - */ - public String getInputFormat() { - return this.sd.getInputFormat(); - } - - /** - * Gets the output format. - * - * @return the output format - */ - public String getOutputFormat() { - return this.sd.getOutputFormat(); - } - - /** - * Gets the storage handler. - * - * @return the storage handler - */ - public String getStorageHandler() { - return this.sd - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.sd.getLocation(); - } - - /** - * Gets the serde. - * - * @return the serde - */ - public String getSerDe() { - return this.sd.getSerdeInfo().getSerializationLib(); - } - - public Map getParameters() { - return this.parameters; - } - - /** - * Gets the last access time. - * - * @return the last access time - */ - public int getLastAccessTime() { - return this.lastAccessTime; - } - - /** - * Gets the creates the time. - * - * @return the creates the time - */ - public int getCreateTime() { - return this.createTime; - } - - /** - * Gets the values. - * - * @return the values - */ - public List getValues() { - return this.values; - } - - /** - * Gets the bucket columns. - * - * @return the bucket columns - */ - public List getBucketCols() { - return this.sd.getBucketCols(); - } - - /** - * Gets the number of buckets. - * - * @return the number of buckets - */ - public int getNumBuckets() { - return this.sd.getNumBuckets(); - } - - /** - * Gets the sort columns. - * - * @return the sort columns - */ - public List getSortCols() { - return this.sd.getSortCols(); - } - - @Override - public String toString() { - return "HCatPartition [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (values != null ? "values=" + values + ", " : "values=null") - + "createTime=" + createTime + ", lastAccessTime=" - + lastAccessTime + ", " + (sd != null ? "sd=" + sd + ", " : "sd=null") - + (parameters != null ? "parameters=" + parameters : "parameters=null") + "]"; - } + private String tableName; + private String dbName; + private List values; + private List tableCols; + private int createTime; + private int lastAccessTime; + private StorageDescriptor sd; + private Map parameters; + + HCatPartition(Partition partition) throws HCatException { + this.tableName = partition.getTableName(); + this.dbName = partition.getDbName(); + this.createTime = partition.getCreateTime(); + this.lastAccessTime = partition.getLastAccessTime(); + this.parameters = partition.getParameters(); + this.values = partition.getValues(); + this.sd = partition.getSd(); + this.tableCols = new ArrayList(); + for (FieldSchema fs : this.sd.getCols()) { + this.tableCols.add(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + /** + * Gets the columns of the table. + * + * @return the columns + */ + public List getColumns() { + return this.tableCols; + } + + /** + * Gets the input format. + * + * @return the input format + */ + public String getInputFormat() { + return this.sd.getInputFormat(); + } + + /** + * Gets the output format. + * + * @return the output format + */ + public String getOutputFormat() { + return this.sd.getOutputFormat(); + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return this.sd + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.sd.getLocation(); + } + + /** + * Gets the serde. + * + * @return the serde + */ + public String getSerDe() { + return this.sd.getSerdeInfo().getSerializationLib(); + } + + public Map getParameters() { + return this.parameters; + } + + /** + * Gets the last access time. + * + * @return the last access time + */ + public int getLastAccessTime() { + return this.lastAccessTime; + } + + /** + * Gets the creates the time. + * + * @return the creates the time + */ + public int getCreateTime() { + return this.createTime; + } + + /** + * Gets the values. + * + * @return the values + */ + public List getValues() { + return this.values; + } + + /** + * Gets the bucket columns. + * + * @return the bucket columns + */ + public List getBucketCols() { + return this.sd.getBucketCols(); + } + + /** + * Gets the number of buckets. + * + * @return the number of buckets + */ + public int getNumBuckets() { + return this.sd.getNumBuckets(); + } + + /** + * Gets the sort columns. + * + * @return the sort columns + */ + public List getSortCols() { + return this.sd.getSortCols(); + } + + @Override + public String toString() { + return "HCatPartition [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (values != null ? "values=" + values + ", " : "values=null") + + "createTime=" + createTime + ", lastAccessTime=" + + lastAccessTime + ", " + (sd != null ? "sd=" + sd + ", " : "sd=null") + + (parameters != null ? "parameters=" + parameters : "parameters=null") + "]"; + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatTable.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatTable.java index 17840cb..515f829 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatTable.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatTable.java @@ -35,194 +35,194 @@ */ public class HCatTable { - private String tableName; - private String tabletype; - private List cols; - private List partCols; - private List bucketCols; - private List sortCols; - private int numBuckets; - private String inputFileFormat; - private String outputFileFormat; - private String storageHandler; - private Map tblProps; - private String dbName; - private String serde; - private String location; - - HCatTable(Table hiveTable) throws HCatException { - this.tableName = hiveTable.getTableName(); - this.dbName = hiveTable.getDbName(); - this.tabletype = hiveTable.getTableType(); - cols = new ArrayList(); - for (FieldSchema colFS : hiveTable.getSd().getCols()) { - cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); - } - partCols = new ArrayList(); - for (FieldSchema colFS : hiveTable.getPartitionKeys()) { - partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); - } - bucketCols = hiveTable.getSd().getBucketCols(); - sortCols = hiveTable.getSd().getSortCols(); - numBuckets = hiveTable.getSd().getNumBuckets(); - inputFileFormat = hiveTable.getSd().getInputFormat(); - outputFileFormat = hiveTable.getSd().getOutputFormat(); - storageHandler = hiveTable - .getSd() - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); - tblProps = hiveTable.getParameters(); - serde = hiveTable.getSd().getSerdeInfo().getSerializationLib(); - location = hiveTable.getSd().getLocation(); - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return tableName; - } - - /** - * Gets the db name. - * - * @return the db name - */ - public String getDbName() { - return dbName; - } - - /** - * Gets the columns. - * - * @return the columns - */ - public List getCols() { - return cols; - } - - /** - * Gets the part columns. - * - * @return the part columns - */ - public List getPartCols() { - return partCols; - } - - /** - * Gets the bucket columns. - * - * @return the bucket columns - */ - public List getBucketCols() { - return bucketCols; - } - - /** - * Gets the sort columns. - * - * @return the sort columns - */ - public List getSortCols() { - return sortCols; - } - - /** - * Gets the number of buckets. - * - * @return the number of buckets - */ - public int getNumBuckets() { - return numBuckets; - } - - /** - * Gets the storage handler. - * - * @return the storage handler - */ - public String getStorageHandler() { - return storageHandler; - } - - /** - * Gets the table props. - * - * @return the table props - */ - public Map getTblProps() { - return tblProps; - } - - /** - * Gets the tabletype. - * - * @return the tabletype - */ - public String getTabletype() { - return tabletype; - } - - /** - * Gets the input file format. - * - * @return the input file format - */ - public String getInputFileFormat() { - return inputFileFormat; - } - - /** - * Gets the output file format. - * - * @return the output file format - */ - public String getOutputFileFormat() { - return outputFileFormat; - } - - /** - * Gets the serde lib. - * - * @return the serde lib - */ - public String getSerdeLib() { - return serde; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return location; - } - - @Override - public String toString() { - return "HCatTable [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (tabletype != null ? "tabletype=" + tabletype + ", " : "tabletype=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols==null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (inputFileFormat != null ? "inputFileFormat=" - + inputFileFormat + ", " : "inputFileFormat=null") - + (outputFileFormat != null ? "outputFileFormat=" - + outputFileFormat + ", " : "outputFileFormat=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - + ", " : "storageHandler=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + (serde != null ? "serde=" + serde + ", " : "serde=") - + (location != null ? "location=" + location : "location=") + "]"; - } + private String tableName; + private String tabletype; + private List cols; + private List partCols; + private List bucketCols; + private List sortCols; + private int numBuckets; + private String inputFileFormat; + private String outputFileFormat; + private String storageHandler; + private Map tblProps; + private String dbName; + private String serde; + private String location; + + HCatTable(Table hiveTable) throws HCatException { + this.tableName = hiveTable.getTableName(); + this.dbName = hiveTable.getDbName(); + this.tabletype = hiveTable.getTableType(); + cols = new ArrayList(); + for (FieldSchema colFS : hiveTable.getSd().getCols()) { + cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); + } + partCols = new ArrayList(); + for (FieldSchema colFS : hiveTable.getPartitionKeys()) { + partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); + } + bucketCols = hiveTable.getSd().getBucketCols(); + sortCols = hiveTable.getSd().getSortCols(); + numBuckets = hiveTable.getSd().getNumBuckets(); + inputFileFormat = hiveTable.getSd().getInputFormat(); + outputFileFormat = hiveTable.getSd().getOutputFormat(); + storageHandler = hiveTable + .getSd() + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + tblProps = hiveTable.getParameters(); + serde = hiveTable.getSd().getSerdeInfo().getSerializationLib(); + location = hiveTable.getSd().getLocation(); + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return tableName; + } + + /** + * Gets the db name. + * + * @return the db name + */ + public String getDbName() { + return dbName; + } + + /** + * Gets the columns. + * + * @return the columns + */ + public List getCols() { + return cols; + } + + /** + * Gets the part columns. + * + * @return the part columns + */ + public List getPartCols() { + return partCols; + } + + /** + * Gets the bucket columns. + * + * @return the bucket columns + */ + public List getBucketCols() { + return bucketCols; + } + + /** + * Gets the sort columns. + * + * @return the sort columns + */ + public List getSortCols() { + return sortCols; + } + + /** + * Gets the number of buckets. + * + * @return the number of buckets + */ + public int getNumBuckets() { + return numBuckets; + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return storageHandler; + } + + /** + * Gets the table props. + * + * @return the table props + */ + public Map getTblProps() { + return tblProps; + } + + /** + * Gets the tabletype. + * + * @return the tabletype + */ + public String getTabletype() { + return tabletype; + } + + /** + * Gets the input file format. + * + * @return the input file format + */ + public String getInputFileFormat() { + return inputFileFormat; + } + + /** + * Gets the output file format. + * + * @return the output file format + */ + public String getOutputFileFormat() { + return outputFileFormat; + } + + /** + * Gets the serde lib. + * + * @return the serde lib + */ + public String getSerdeLib() { + return serde; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return location; + } + + @Override + public String toString() { + return "HCatTable [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (tabletype != null ? "tabletype=" + tabletype + ", " : "tabletype=null") + + (cols != null ? "cols=" + cols + ", " : "cols=null") + + (partCols != null ? "partCols=" + partCols + ", " : "partCols==null") + + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") + + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") + + "numBuckets=" + + numBuckets + + ", " + + (inputFileFormat != null ? "inputFileFormat=" + + inputFileFormat + ", " : "inputFileFormat=null") + + (outputFileFormat != null ? "outputFileFormat=" + + outputFileFormat + ", " : "outputFileFormat=null") + + (storageHandler != null ? "storageHandler=" + storageHandler + + ", " : "storageHandler=null") + + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") + + (serde != null ? "serde=" + serde + ", " : "serde=") + + (location != null ? "location=" + location : "location=") + "]"; + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ObjectNotFoundException.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ObjectNotFoundException.java index 4d4baba..0858b2b 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ObjectNotFoundException.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ObjectNotFoundException.java @@ -28,13 +28,13 @@ */ public class ObjectNotFoundException extends HCatException { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - /** - * @param message Exception message. - * @param cause The wrapped Throwable that caused this exception. - */ - public ObjectNotFoundException(String message, Throwable cause) { - super(message, cause); - } + /** + * @param message Exception message. + * @param cause The wrapped Throwable that caused this exception. + */ + public ObjectNotFoundException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ConnectionFailureException.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ConnectionFailureException.java index 4c70bae..01dbce7 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ConnectionFailureException.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ConnectionFailureException.java @@ -26,14 +26,14 @@ */ public class ConnectionFailureException extends HCatException { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - /** - * @param message Exception message. - * @param cause The wrapped Throwable that caused this exception. - */ - public ConnectionFailureException(String message, Throwable cause) { - super(message, cause); - } + /** + * @param message Exception message. + * @param cause The wrapped Throwable that caused this exception. + */ + public ConnectionFailureException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java index 17fc956..afa3054 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java @@ -39,147 +39,147 @@ */ public class HCatAddPartitionDesc { - private static final Logger LOG = LoggerFactory.getLogger(HCatAddPartitionDesc.class); - private String tableName; - private String dbName; - private String location; - private Map partSpec; - - private HCatAddPartitionDesc(String dbName, String tbl, String loc, Map spec) { - this.dbName = dbName; - this.tableName = tbl; - this.location = loc; - this.partSpec = spec; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.location; + private static final Logger LOG = LoggerFactory.getLogger(HCatAddPartitionDesc.class); + private String tableName; + private String dbName; + private String location; + private Map partSpec; + + private HCatAddPartitionDesc(String dbName, String tbl, String loc, Map spec) { + this.dbName = dbName; + this.tableName = tbl; + this.location = loc; + this.partSpec = spec; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.location; + } + + + /** + * Gets the partition spec. + * + * @return the partition spec + */ + public Map getPartitionSpec() { + return this.partSpec; + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + @Override + public String toString() { + return "HCatAddPartitionDesc [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (location != null ? "location=" + location + ", " : "location=null") + + (partSpec != null ? "partSpec=" + partSpec : "partSpec=null") + "]"; + } + + /** + * Creates the builder for specifying attributes. + * + * @param dbName the db name + * @param tableName the table name + * @param location the location + * @param partSpec the part spec + * @return the builder + * @throws HCatException + */ + public static Builder create(String dbName, String tableName, String location, + Map partSpec) throws HCatException { + return new Builder(dbName, tableName, location, partSpec); + } + + Partition toHivePartition(Table hiveTable) throws HCatException { + Partition hivePtn = new Partition(); + hivePtn.setDbName(this.dbName); + hivePtn.setTableName(this.tableName); + + List pvals = new ArrayList(); + for (FieldSchema field : hiveTable.getPartitionKeys()) { + String val = partSpec.get(field.getName()); + if (val == null || val.length() == 0) { + throw new HCatException("create partition: Value for key " + + field.getName() + " is null or empty"); + } + pvals.add(val); } - - /** - * Gets the partition spec. - * - * @return the partition spec - */ - public Map getPartitionSpec() { - return this.partSpec; + hivePtn.setValues(pvals); + StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); + hivePtn.setSd(sd); + hivePtn.setParameters(hiveTable.getParameters()); + if (this.location != null) { + hivePtn.getSd().setLocation(this.location); + } else { + String partName; + try { + partName = Warehouse.makePartName( + hiveTable.getPartitionKeys(), pvals); + LOG.info("Setting partition location to :" + partName); + } catch (MetaException e) { + throw new HCatException("Exception while creating partition name.", e); + } + Path partPath = new Path(hiveTable.getSd().getLocation(), partName); + hivePtn.getSd().setLocation(partPath.toString()); } + hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); + hivePtn.setLastAccessTimeIsSet(false); + return hivePtn; + } - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } + public static class Builder { - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } + private String tableName; + private String location; + private Map values; + private String dbName; - @Override - public String toString() { - return "HCatAddPartitionDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (partSpec != null ? "partSpec=" + partSpec : "partSpec=null") + "]"; + private Builder(String dbName, String tableName, String location, Map values) { + this.dbName = dbName; + this.tableName = tableName; + this.location = location; + this.values = values; } /** - * Creates the builder for specifying attributes. + * Builds the HCatAddPartitionDesc. * - * @param dbName the db name - * @param tableName the table name - * @param location the location - * @param partSpec the part spec - * @return the builder + * @return the h cat add partition desc * @throws HCatException */ - public static Builder create(String dbName, String tableName, String location, - Map partSpec) throws HCatException { - return new Builder(dbName, tableName, location, partSpec); - } - - Partition toHivePartition(Table hiveTable) throws HCatException { - Partition hivePtn = new Partition(); - hivePtn.setDbName(this.dbName); - hivePtn.setTableName(this.tableName); - - List pvals = new ArrayList(); - for (FieldSchema field : hiveTable.getPartitionKeys()) { - String val = partSpec.get(field.getName()); - if (val == null || val.length() == 0) { - throw new HCatException("create partition: Value for key " - + field.getName() + " is null or empty"); - } - pvals.add(val); - } - - hivePtn.setValues(pvals); - StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); - hivePtn.setSd(sd); - hivePtn.setParameters(hiveTable.getParameters()); - if (this.location != null) { - hivePtn.getSd().setLocation(this.location); - } else { - String partName; - try { - partName = Warehouse.makePartName( - hiveTable.getPartitionKeys(), pvals); - LOG.info("Setting partition location to :" + partName); - } catch (MetaException e) { - throw new HCatException("Exception while creating partition name.", e); - } - Path partPath = new Path(hiveTable.getSd().getLocation(), partName); - hivePtn.getSd().setLocation(partPath.toString()); - } - hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); - hivePtn.setLastAccessTimeIsSet(false); - return hivePtn; - } - - public static class Builder { - - private String tableName; - private String location; - private Map values; - private String dbName; - - private Builder(String dbName, String tableName, String location, Map values) { - this.dbName = dbName; - this.tableName = tableName; - this.location = location; - this.values = values; - } - - /** - * Builds the HCatAddPartitionDesc. - * - * @return the h cat add partition desc - * @throws HCatException - */ - public HCatAddPartitionDesc build() throws HCatException { - if (this.dbName == null) { - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatAddPartitionDesc desc = new HCatAddPartitionDesc( - this.dbName, this.tableName, this.location, - this.values); - return desc; - } + public HCatAddPartitionDesc build() throws HCatException { + if (this.dbName == null) { + this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + } + HCatAddPartitionDesc desc = new HCatAddPartitionDesc( + this.dbName, this.tableName, this.location, + this.values); + return desc; } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java index 518d342..7df84e9 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java @@ -32,330 +32,330 @@ */ public abstract class HCatClient { - public enum DropDBMode {RESTRICT, CASCADE} - - public static final String HCAT_CLIENT_IMPL_CLASS = "hcat.client.impl.class"; - - /** - * Creates an instance of HCatClient. - * - * @param conf An instance of configuration. - * @return An instance of HCatClient. - * @throws HCatException - */ - public static HCatClient create(Configuration conf) throws HCatException { - HCatClient client = null; - String className = conf.get(HCAT_CLIENT_IMPL_CLASS, - HCatClientHMSImpl.class.getName()); - try { - Class clientClass = Class.forName(className, - true, JavaUtils.getClassLoader()).asSubclass( - HCatClient.class); - client = (HCatClient) clientClass.newInstance(); - } catch (ClassNotFoundException e) { - throw new HCatException( - "ClassNotFoundException while creating client class.", e); - } catch (InstantiationException e) { - throw new HCatException( - "InstantiationException while creating client class.", e); - } catch (IllegalAccessException e) { - throw new HCatException( - "IllegalAccessException while creating client class.", e); - } - if (client != null) { - client.initialize(conf); - } - return client; + public enum DropDBMode {RESTRICT, CASCADE} + + public static final String HCAT_CLIENT_IMPL_CLASS = "hcat.client.impl.class"; + + /** + * Creates an instance of HCatClient. + * + * @param conf An instance of configuration. + * @return An instance of HCatClient. + * @throws HCatException + */ + public static HCatClient create(Configuration conf) throws HCatException { + HCatClient client = null; + String className = conf.get(HCAT_CLIENT_IMPL_CLASS, + HCatClientHMSImpl.class.getName()); + try { + Class clientClass = Class.forName(className, + true, JavaUtils.getClassLoader()).asSubclass( + HCatClient.class); + client = (HCatClient) clientClass.newInstance(); + } catch (ClassNotFoundException e) { + throw new HCatException( + "ClassNotFoundException while creating client class.", e); + } catch (InstantiationException e) { + throw new HCatException( + "InstantiationException while creating client class.", e); + } catch (IllegalAccessException e) { + throw new HCatException( + "IllegalAccessException while creating client class.", e); } - - abstract void initialize(Configuration conf) throws HCatException; - - /** - * Get all existing databases that match the given - * pattern. The matching occurs as per Java regular expressions - * - * @param pattern java re pattern - * @return list of database names - * @throws HCatException - */ - public abstract List listDatabaseNamesByPattern(String pattern) - throws HCatException; - - /** - * Gets the database. - * - * @param dbName The name of the database. - * @return An instance of HCatDatabaseInfo. - * @throws HCatException - */ - public abstract HCatDatabase getDatabase(String dbName) throws HCatException; - - /** - * Creates the database. - * - * @param dbInfo An instance of HCatCreateDBDesc. - * @throws HCatException - */ - public abstract void createDatabase(HCatCreateDBDesc dbInfo) - throws HCatException; - - /** - * Drops a database. - * - * @param dbName The name of the database to delete. - * @param ifExists Hive returns an error if the database specified does not exist, - * unless ifExists is set to true. - * @param mode This is set to either "restrict" or "cascade". Restrict will - * remove the schema if all the tables are empty. Cascade removes - * everything including data and definitions. - * @throws HCatException - */ - public abstract void dropDatabase(String dbName, boolean ifExists, - DropDBMode mode) throws HCatException; - - /** - * Returns all existing tables from the specified database which match the given - * pattern. The matching occurs as per Java regular expressions. - * @param dbName The name of the DB (to be searched) - * @param tablePattern The regex for the table-name - * @return list of table names - * @throws HCatException - */ - public abstract List listTableNamesByPattern(String dbName, String tablePattern) - throws HCatException; - - /** - * Gets the table. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @return An instance of HCatTableInfo. - * @throws HCatException - */ - public abstract HCatTable getTable(String dbName, String tableName) - throws HCatException; - - /** - * Creates the table. - * - * @param createTableDesc An instance of HCatCreateTableDesc class. - * @throws HCatException - */ - public abstract void createTable(HCatCreateTableDesc createTableDesc) throws HCatException; - - /** - * Updates the Table's column schema to the specified definition. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @param columnSchema The (new) definition of the column schema (i.e. list of fields). - * - */ - public abstract void updateTableSchema(String dbName, String tableName, List columnSchema) - throws HCatException; - - /** - * Creates the table like an existing table. - * - * @param dbName The name of the database. - * @param existingTblName The name of the existing table. - * @param newTableName The name of the new table. - * @param ifNotExists If true, then error related to already table existing is skipped. - * @param isExternal Set to "true", if table has be created at a different - * location other than default. - * @param location The location for the table. - * @throws HCatException - */ - public abstract void createTableLike(String dbName, String existingTblName, - String newTableName, boolean ifNotExists, boolean isExternal, - String location) throws HCatException; - - /** - * Drop table. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @param ifExists Hive returns an error if the database specified does not exist, - * unless ifExists is set to true. - * @throws HCatException - */ - public abstract void dropTable(String dbName, String tableName, - boolean ifExists) throws HCatException; - - /** - * Renames a table. - * - * @param dbName The name of the database. - * @param oldName The name of the table to be renamed. - * @param newName The new name of the table. - * @throws HCatException - */ - public abstract void renameTable(String dbName, String oldName, - String newName) throws HCatException; - - /** - * Gets all the partitions. - * - * @param dbName The name of the database. - * @param tblName The name of the table. - * @return A list of partitions. - * @throws HCatException - */ - public abstract List getPartitions(String dbName, String tblName) - throws HCatException; - - /** - * Gets all the partitions that match the specified (and possibly partial) partition specification. - * A partial partition-specification is one where not all partition-keys have associated values. For example, - * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), - * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, - * 1. Complete partition spec: getPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would return 1 partition. - * 2. Partial partition spec: getPartitions('myDb', 'myTable', {dt='20120101'}) would return all 3 partitions, - * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). - * @param dbName The name of the database. - * @param tblName The name of the table. - * @param partitionSpec The partition specification. (Need not include all partition keys.) - * @return A list of partitions. - * @throws HCatException - */ - public abstract List getPartitions(String dbName, String tblName, Map partitionSpec) - throws HCatException; - - /** - * Gets the partition. - * - * @param dbName The database name. - * @param tableName The table name. - * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. All partition-key-values - * must be specified. - * @return An instance of HCatPartitionInfo. - * @throws HCatException - */ - public abstract HCatPartition getPartition(String dbName, String tableName, - Map partitionSpec) throws HCatException; - - /** - * Adds the partition. - * - * @param partInfo An instance of HCatAddPartitionDesc. - * @throws HCatException - */ - public abstract void addPartition(HCatAddPartitionDesc partInfo) - throws HCatException; - - /** - * Adds a list of partitions. - * - * @param partInfoList A list of HCatAddPartitionDesc. - * @return The number of partitions added. - * @throws HCatException - */ - public abstract int addPartitions(List partInfoList) - throws HCatException; - - /** - * Drops partition(s) that match the specified (and possibly partial) partition specification. - * A partial partition-specification is one where not all partition-keys have associated values. For example, - * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), - * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, - * 1. Complete partition spec: dropPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would drop 1 partition. - * 2. Partial partition spec: dropPartitions('myDb', 'myTable', {dt='20120101'}) would drop all 3 partitions, - * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). - * @param dbName The database name. - * @param tableName The table name. - * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. - * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true. - * @throws HCatException,ConnectionFailureException - */ - public abstract void dropPartitions(String dbName, String tableName, - Map partitionSpec, boolean ifExists) - throws HCatException; - - /** - * List partitions by filter. - * - * @param dbName The database name. - * @param tblName The table name. - * @param filter The filter string, - * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can - * be done only on string partition keys. - * @return list of partitions - * @throws HCatException - */ - public abstract List listPartitionsByFilter(String dbName, String tblName, - String filter) throws HCatException; - - /** - * Mark partition for event. - * - * @param dbName The database name. - * @param tblName The table name. - * @param partKVs the key-values associated with the partition. - * @param eventType the event type - * @throws HCatException - */ - public abstract void markPartitionForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException; - - /** - * Checks if a partition is marked for event. - * - * @param dbName the db name - * @param tblName the table name - * @param partKVs the key-values associated with the partition. - * @param eventType the event type - * @return true, if is partition marked for event - * @throws HCatException - */ - public abstract boolean isPartitionMarkedForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException; - - /** - * Gets the delegation token. - * - * @param owner the owner - * @param renewerKerberosPrincipalName the renewer kerberos principal name - * @return the delegation token - * @throws HCatException,ConnectionFailureException - */ - public abstract String getDelegationToken(String owner, - String renewerKerberosPrincipalName) throws HCatException; - - /** - * Renew delegation token. - * - * @param tokenStrForm the token string - * @return the new expiration time - * @throws HCatException - */ - public abstract long renewDelegationToken(String tokenStrForm) - throws HCatException; - - /** - * Cancel delegation token. - * - * @param tokenStrForm the token string - * @throws HCatException - */ - public abstract void cancelDelegationToken(String tokenStrForm) - throws HCatException; - - /** - * Retrieve Message-bus topic for a table. - * - * @param dbName The name of the DB. - * @param tableName The name of the table. - * @return Topic-name for the message-bus on which messages will be sent for the specified table. - * By default, this is set to .. Returns null when not set. - */ - public abstract String getMessageBusTopicName(String dbName, String tableName) throws HCatException; - - /** - * Close the hcatalog client. - * - * @throws HCatException - */ - public abstract void close() throws HCatException; + if (client != null) { + client.initialize(conf); + } + return client; + } + + abstract void initialize(Configuration conf) throws HCatException; + + /** + * Get all existing databases that match the given + * pattern. The matching occurs as per Java regular expressions + * + * @param pattern java re pattern + * @return list of database names + * @throws HCatException + */ + public abstract List listDatabaseNamesByPattern(String pattern) + throws HCatException; + + /** + * Gets the database. + * + * @param dbName The name of the database. + * @return An instance of HCatDatabaseInfo. + * @throws HCatException + */ + public abstract HCatDatabase getDatabase(String dbName) throws HCatException; + + /** + * Creates the database. + * + * @param dbInfo An instance of HCatCreateDBDesc. + * @throws HCatException + */ + public abstract void createDatabase(HCatCreateDBDesc dbInfo) + throws HCatException; + + /** + * Drops a database. + * + * @param dbName The name of the database to delete. + * @param ifExists Hive returns an error if the database specified does not exist, + * unless ifExists is set to true. + * @param mode This is set to either "restrict" or "cascade". Restrict will + * remove the schema if all the tables are empty. Cascade removes + * everything including data and definitions. + * @throws HCatException + */ + public abstract void dropDatabase(String dbName, boolean ifExists, + DropDBMode mode) throws HCatException; + + /** + * Returns all existing tables from the specified database which match the given + * pattern. The matching occurs as per Java regular expressions. + * @param dbName The name of the DB (to be searched) + * @param tablePattern The regex for the table-name + * @return list of table names + * @throws HCatException + */ + public abstract List listTableNamesByPattern(String dbName, String tablePattern) + throws HCatException; + + /** + * Gets the table. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @return An instance of HCatTableInfo. + * @throws HCatException + */ + public abstract HCatTable getTable(String dbName, String tableName) + throws HCatException; + + /** + * Creates the table. + * + * @param createTableDesc An instance of HCatCreateTableDesc class. + * @throws HCatException + */ + public abstract void createTable(HCatCreateTableDesc createTableDesc) throws HCatException; + + /** + * Updates the Table's column schema to the specified definition. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @param columnSchema The (new) definition of the column schema (i.e. list of fields). + * + */ + public abstract void updateTableSchema(String dbName, String tableName, List columnSchema) + throws HCatException; + + /** + * Creates the table like an existing table. + * + * @param dbName The name of the database. + * @param existingTblName The name of the existing table. + * @param newTableName The name of the new table. + * @param ifNotExists If true, then error related to already table existing is skipped. + * @param isExternal Set to "true", if table has be created at a different + * location other than default. + * @param location The location for the table. + * @throws HCatException + */ + public abstract void createTableLike(String dbName, String existingTblName, + String newTableName, boolean ifNotExists, boolean isExternal, + String location) throws HCatException; + + /** + * Drop table. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @param ifExists Hive returns an error if the database specified does not exist, + * unless ifExists is set to true. + * @throws HCatException + */ + public abstract void dropTable(String dbName, String tableName, + boolean ifExists) throws HCatException; + + /** + * Renames a table. + * + * @param dbName The name of the database. + * @param oldName The name of the table to be renamed. + * @param newName The new name of the table. + * @throws HCatException + */ + public abstract void renameTable(String dbName, String oldName, + String newName) throws HCatException; + + /** + * Gets all the partitions. + * + * @param dbName The name of the database. + * @param tblName The name of the table. + * @return A list of partitions. + * @throws HCatException + */ + public abstract List getPartitions(String dbName, String tblName) + throws HCatException; + + /** + * Gets all the partitions that match the specified (and possibly partial) partition specification. + * A partial partition-specification is one where not all partition-keys have associated values. For example, + * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), + * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, + * 1. Complete partition spec: getPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would return 1 partition. + * 2. Partial partition spec: getPartitions('myDb', 'myTable', {dt='20120101'}) would return all 3 partitions, + * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). + * @param dbName The name of the database. + * @param tblName The name of the table. + * @param partitionSpec The partition specification. (Need not include all partition keys.) + * @return A list of partitions. + * @throws HCatException + */ + public abstract List getPartitions(String dbName, String tblName, Map partitionSpec) + throws HCatException; + + /** + * Gets the partition. + * + * @param dbName The database name. + * @param tableName The table name. + * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. All partition-key-values + * must be specified. + * @return An instance of HCatPartitionInfo. + * @throws HCatException + */ + public abstract HCatPartition getPartition(String dbName, String tableName, + Map partitionSpec) throws HCatException; + + /** + * Adds the partition. + * + * @param partInfo An instance of HCatAddPartitionDesc. + * @throws HCatException + */ + public abstract void addPartition(HCatAddPartitionDesc partInfo) + throws HCatException; + + /** + * Adds a list of partitions. + * + * @param partInfoList A list of HCatAddPartitionDesc. + * @return The number of partitions added. + * @throws HCatException + */ + public abstract int addPartitions(List partInfoList) + throws HCatException; + + /** + * Drops partition(s) that match the specified (and possibly partial) partition specification. + * A partial partition-specification is one where not all partition-keys have associated values. For example, + * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), + * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, + * 1. Complete partition spec: dropPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would drop 1 partition. + * 2. Partial partition spec: dropPartitions('myDb', 'myTable', {dt='20120101'}) would drop all 3 partitions, + * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). + * @param dbName The database name. + * @param tableName The table name. + * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. + * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true. + * @throws HCatException,ConnectionFailureException + */ + public abstract void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists) + throws HCatException; + + /** + * List partitions by filter. + * + * @param dbName The database name. + * @param tblName The table name. + * @param filter The filter string, + * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can + * be done only on string partition keys. + * @return list of partitions + * @throws HCatException + */ + public abstract List listPartitionsByFilter(String dbName, String tblName, + String filter) throws HCatException; + + /** + * Mark partition for event. + * + * @param dbName The database name. + * @param tblName The table name. + * @param partKVs the key-values associated with the partition. + * @param eventType the event type + * @throws HCatException + */ + public abstract void markPartitionForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException; + + /** + * Checks if a partition is marked for event. + * + * @param dbName the db name + * @param tblName the table name + * @param partKVs the key-values associated with the partition. + * @param eventType the event type + * @return true, if is partition marked for event + * @throws HCatException + */ + public abstract boolean isPartitionMarkedForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException; + + /** + * Gets the delegation token. + * + * @param owner the owner + * @param renewerKerberosPrincipalName the renewer kerberos principal name + * @return the delegation token + * @throws HCatException,ConnectionFailureException + */ + public abstract String getDelegationToken(String owner, + String renewerKerberosPrincipalName) throws HCatException; + + /** + * Renew delegation token. + * + * @param tokenStrForm the token string + * @return the new expiration time + * @throws HCatException + */ + public abstract long renewDelegationToken(String tokenStrForm) + throws HCatException; + + /** + * Cancel delegation token. + * + * @param tokenStrForm the token string + * @throws HCatException + */ + public abstract void cancelDelegationToken(String tokenStrForm) + throws HCatException; + + /** + * Retrieve Message-bus topic for a table. + * + * @param dbName The name of the DB. + * @param tableName The name of the table. + * @return Topic-name for the message-bus on which messages will be sent for the specified table. + * By default, this is set to .. Returns null when not set. + */ + public abstract String getMessageBusTopicName(String dbName, String tableName) throws HCatException; + + /** + * Close the hcatalog client. + * + * @throws HCatException + */ + public abstract void close() throws HCatException; } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java index aa5f6ea..c4b5971 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java @@ -54,670 +54,670 @@ */ public class HCatClientHMSImpl extends HCatClient { - private HiveMetaStoreClient hmsClient; - private Configuration config; - private HiveConf hiveConfig; - - @Override - public List listDatabaseNamesByPattern(String pattern) - throws HCatException { - List dbNames = null; - try { - dbNames = hmsClient.getDatabases(pattern); - } catch (MetaException exp) { - throw new HCatException("MetaException while listing db names", exp); - } - return dbNames; - } - - @Override - public HCatDatabase getDatabase(String dbName) throws HCatException { - HCatDatabase db = null; - try { - Database hiveDB = hmsClient.getDatabase(checkDB(dbName)); - if (hiveDB != null) { - db = new HCatDatabase(hiveDB); - } - } catch (NoSuchObjectException exp) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching database", exp); - } catch (MetaException exp) { - throw new HCatException("MetaException while fetching database", - exp); - } catch (TException exp) { - throw new ConnectionFailureException( - "TException while fetching database", exp); - } - return db; - } - - @Override - public void createDatabase(HCatCreateDBDesc dbInfo) throws HCatException { - try { - hmsClient.createDatabase(dbInfo.toHiveDb()); - } catch (AlreadyExistsException exp) { - if (!dbInfo.getIfNotExists()) { - throw new HCatException( - "AlreadyExistsException while creating database", exp); - } - } catch (InvalidObjectException exp) { - throw new HCatException( - "InvalidObjectException while creating database", exp); - } catch (MetaException exp) { - throw new HCatException("MetaException while creating database", - exp); - } catch (TException exp) { - throw new ConnectionFailureException( - "TException while creating database", exp); - } + private HiveMetaStoreClient hmsClient; + private Configuration config; + private HiveConf hiveConfig; + + @Override + public List listDatabaseNamesByPattern(String pattern) + throws HCatException { + List dbNames = null; + try { + dbNames = hmsClient.getDatabases(pattern); + } catch (MetaException exp) { + throw new HCatException("MetaException while listing db names", exp); } - - @Override - public void dropDatabase(String dbName, boolean ifExists, DropDBMode mode) - throws HCatException { - boolean isCascade = mode.toString().equalsIgnoreCase("cascade"); - try { - hmsClient.dropDatabase(checkDB(dbName), true, ifExists, isCascade); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping db.", e); - } - } catch (InvalidOperationException e) { - throw new HCatException( - "InvalidOperationException while dropping db.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while dropping db.", e); - } catch (TException e) { - throw new ConnectionFailureException("TException while dropping db.", - e); - } + return dbNames; + } + + @Override + public HCatDatabase getDatabase(String dbName) throws HCatException { + HCatDatabase db = null; + try { + Database hiveDB = hmsClient.getDatabase(checkDB(dbName)); + if (hiveDB != null) { + db = new HCatDatabase(hiveDB); + } + } catch (NoSuchObjectException exp) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching database", exp); + } catch (MetaException exp) { + throw new HCatException("MetaException while fetching database", + exp); + } catch (TException exp) { + throw new ConnectionFailureException( + "TException while fetching database", exp); + } + return db; + } + + @Override + public void createDatabase(HCatCreateDBDesc dbInfo) throws HCatException { + try { + hmsClient.createDatabase(dbInfo.toHiveDb()); + } catch (AlreadyExistsException exp) { + if (!dbInfo.getIfNotExists()) { + throw new HCatException( + "AlreadyExistsException while creating database", exp); + } + } catch (InvalidObjectException exp) { + throw new HCatException( + "InvalidObjectException while creating database", exp); + } catch (MetaException exp) { + throw new HCatException("MetaException while creating database", + exp); + } catch (TException exp) { + throw new ConnectionFailureException( + "TException while creating database", exp); + } + } + + @Override + public void dropDatabase(String dbName, boolean ifExists, DropDBMode mode) + throws HCatException { + boolean isCascade = mode.toString().equalsIgnoreCase("cascade"); + try { + hmsClient.dropDatabase(checkDB(dbName), true, ifExists, isCascade); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping db.", e); + } + } catch (InvalidOperationException e) { + throw new HCatException( + "InvalidOperationException while dropping db.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while dropping db.", e); + } catch (TException e) { + throw new ConnectionFailureException("TException while dropping db.", + e); + } + } + + @Override + public List listTableNamesByPattern(String dbName, + String tablePattern) throws HCatException { + List tableNames = null; + try { + tableNames = hmsClient.getTables(checkDB(dbName), tablePattern); + } catch (MetaException e) { + throw new HCatException( + "MetaException while fetching table names.", e); + } + return tableNames; + } + + @Override + public HCatTable getTable(String dbName, String tableName) + throws HCatException { + HCatTable table = null; + try { + Table hiveTable = hmsClient.getTable(checkDB(dbName), tableName); + if (hiveTable != null) { + table = new HCatTable(hiveTable); + } + } catch (MetaException e) { + throw new HCatException("MetaException while fetching table.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while fetching table.", e); + } + return table; + } + + @Override + public void createTable(HCatCreateTableDesc createTableDesc) + throws HCatException { + try { + hmsClient.createTable(createTableDesc.toHiveTable(hiveConfig)); + } catch (AlreadyExistsException e) { + if (!createTableDesc.getIfNotExists()) { + throw new HCatException( + "AlreadyExistsException while creating table.", e); + } + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while creating table.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while creating table.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while creating table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while creating table.", e); + } catch (IOException e) { + throw new HCatException("IOException while creating hive conf.", e); } - @Override - public List listTableNamesByPattern(String dbName, - String tablePattern) throws HCatException { - List tableNames = null; - try { - tableNames = hmsClient.getTables(checkDB(dbName), tablePattern); - } catch (MetaException e) { - throw new HCatException( - "MetaException while fetching table names.", e); - } - return tableNames; - } - - @Override - public HCatTable getTable(String dbName, String tableName) - throws HCatException { - HCatTable table = null; - try { - Table hiveTable = hmsClient.getTable(checkDB(dbName), tableName); - if (hiveTable != null) { - table = new HCatTable(hiveTable); - } - } catch (MetaException e) { - throw new HCatException("MetaException while fetching table.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while fetching table.", e); - } - return table; - } - - @Override - public void createTable(HCatCreateTableDesc createTableDesc) - throws HCatException { - try { - hmsClient.createTable(createTableDesc.toHiveTable(hiveConfig)); - } catch (AlreadyExistsException e) { - if (!createTableDesc.getIfNotExists()) { - throw new HCatException( - "AlreadyExistsException while creating table.", e); - } - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while creating table.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while creating table.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while creating table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while creating table.", e); - } catch (IOException e) { - throw new HCatException("IOException while creating hive conf.", e); - } + } + @Override + public void updateTableSchema(String dbName, String tableName, List columnSchema) + throws HCatException { + try { + Table table = hmsClient.getTable(dbName, tableName); + table.getSd().setCols(HCatSchemaUtils.getFieldSchemas(columnSchema)); + hmsClient.alter_table(dbName, tableName, table); } - - @Override - public void updateTableSchema(String dbName, String tableName, List columnSchema) - throws HCatException { - try { - Table table = hmsClient.getTable(dbName, tableName); - table.getSd().setCols(HCatSchemaUtils.getFieldSchemas(columnSchema)); - hmsClient.alter_table(dbName, tableName, table); - } - catch (InvalidOperationException e) { - throw new HCatException("InvalidOperationException while updating table schema.", e); - } - catch (MetaException e) { - throw new HCatException("MetaException while updating table schema.", e); - } - catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while updating table schema.", e); - } - catch (TException e) { - throw new ConnectionFailureException( - "TException while updating table schema.", e); - } + catch (InvalidOperationException e) { + throw new HCatException("InvalidOperationException while updating table schema.", e); } - - @Override - public void createTableLike(String dbName, String existingTblName, - String newTableName, boolean ifNotExists, boolean isExternal, - String location) throws HCatException { - - Table hiveTable = getHiveTableLike(checkDB(dbName), existingTblName, - newTableName, ifNotExists, location); - if (hiveTable != null) { - try { - hmsClient.createTable(hiveTable); - } catch (AlreadyExistsException e) { - if (!ifNotExists) { - throw new HCatException( - "A table already exists with the name " - + newTableName, e); - } - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException in create table like command.", - e); - } catch (MetaException e) { - throw new HCatException( - "MetaException in create table like command.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException in create table like command.", - e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException in create table like command.", e); - } - } + catch (MetaException e) { + throw new HCatException("MetaException while updating table schema.", e); } - - @Override - public void dropTable(String dbName, String tableName, boolean ifExists) - throws HCatException { - try { - hmsClient.dropTable(checkDB(dbName), tableName, true, ifExists); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping table.", e); - } - } catch (MetaException e) { - throw new HCatException("MetaException while dropping table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while dropping table.", e); - } + catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while updating table schema.", e); } - - @Override - public void renameTable(String dbName, String oldName, String newName) - throws HCatException { - Table tbl; - try { - Table oldtbl = hmsClient.getTable(checkDB(dbName), oldName); - if (oldtbl != null) { - // TODO : Should be moved out. - if (oldtbl - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) != null) { - throw new HCatException( - "Cannot use rename command on a non-native table"); - } - tbl = new Table(oldtbl); - tbl.setTableName(newName); - hmsClient.alter_table(checkDB(dbName), oldName, tbl); - } - } catch (MetaException e) { - throw new HCatException("MetaException while renaming table", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while renaming table", e); - } catch (InvalidOperationException e) { - throw new HCatException( - "InvalidOperationException while renaming table", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while renaming table", e); - } + catch (TException e) { + throw new ConnectionFailureException( + "TException while updating table schema.", e); } - - @Override - public List getPartitions(String dbName, String tblName) - throws HCatException { - List hcatPtns = new ArrayList(); - try { - List hivePtns = hmsClient.listPartitions( - checkDB(dbName), tblName, (short) -1); - for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); - } - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving partition.", e); - } catch (MetaException e) { - throw new HCatException( - "MetaException while retrieving partition.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving partition.", e); - } - return hcatPtns; + } + + @Override + public void createTableLike(String dbName, String existingTblName, + String newTableName, boolean ifNotExists, boolean isExternal, + String location) throws HCatException { + + Table hiveTable = getHiveTableLike(checkDB(dbName), existingTblName, + newTableName, ifNotExists, location); + if (hiveTable != null) { + try { + hmsClient.createTable(hiveTable); + } catch (AlreadyExistsException e) { + if (!ifNotExists) { + throw new HCatException( + "A table already exists with the name " + + newTableName, e); + } + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException in create table like command.", + e); + } catch (MetaException e) { + throw new HCatException( + "MetaException in create table like command.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException in create table like command.", + e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException in create table like command.", e); + } } - - @Override - public List getPartitions(String dbName, String tblName, Map partitionSpec) throws HCatException { - return listPartitionsByFilter(dbName, tblName, getFilterString(partitionSpec)); + } + + @Override + public void dropTable(String dbName, String tableName, boolean ifExists) + throws HCatException { + try { + hmsClient.dropTable(checkDB(dbName), tableName, true, ifExists); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping table.", e); + } + } catch (MetaException e) { + throw new HCatException("MetaException while dropping table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while dropping table.", e); + } + } + + @Override + public void renameTable(String dbName, String oldName, String newName) + throws HCatException { + Table tbl; + try { + Table oldtbl = hmsClient.getTable(checkDB(dbName), oldName); + if (oldtbl != null) { + // TODO : Should be moved out. + if (oldtbl + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) != null) { + throw new HCatException( + "Cannot use rename command on a non-native table"); + } + tbl = new Table(oldtbl); + tbl.setTableName(newName); + hmsClient.alter_table(checkDB(dbName), oldName, tbl); + } + } catch (MetaException e) { + throw new HCatException("MetaException while renaming table", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while renaming table", e); + } catch (InvalidOperationException e) { + throw new HCatException( + "InvalidOperationException while renaming table", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while renaming table", e); } + } + + @Override + public List getPartitions(String dbName, String tblName) + throws HCatException { + List hcatPtns = new ArrayList(); + try { + List hivePtns = hmsClient.listPartitions( + checkDB(dbName), tblName, (short) -1); + for (Partition ptn : hivePtns) { + hcatPtns.add(new HCatPartition(ptn)); + } + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving partition.", e); + } catch (MetaException e) { + throw new HCatException( + "MetaException while retrieving partition.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving partition.", e); + } + return hcatPtns; + } - private static String getFilterString(Map partitionSpec) { - final String AND = " AND "; + @Override + public List getPartitions(String dbName, String tblName, Map partitionSpec) throws HCatException { + return listPartitionsByFilter(dbName, tblName, getFilterString(partitionSpec)); + } - StringBuilder filter = new StringBuilder(); - for (Map.Entry entry : partitionSpec.entrySet()) { - filter.append(entry.getKey()).append("=").append("\"").append(entry.getValue()).append("\"").append(AND); - } + private static String getFilterString(Map partitionSpec) { + final String AND = " AND "; - int length = filter.toString().length(); - if (length > 0) - filter.delete(length - AND.length(), length); - - return filter.toString(); - } - - @Override - public HCatPartition getPartition(String dbName, String tableName, - Map partitionSpec) throws HCatException { - HCatPartition partition = null; - try { - List partitionColumns = getTable(checkDB(dbName), tableName).getPartCols(); - if (partitionColumns.size() != partitionSpec.size()) { - throw new HCatException("Partition-spec doesn't have the right number of partition keys."); - } - - ArrayList ptnValues = new ArrayList(); - for (HCatFieldSchema partitionColumn : partitionColumns) { - String partKey = partitionColumn.getName(); - if (partitionSpec.containsKey(partKey)) { - ptnValues.add(partitionSpec.get(partKey)); // Partition-keys added in order. - } - else { - throw new HCatException("Invalid partition-key specified: " + partKey); - } - } - Partition hivePartition = hmsClient.getPartition(checkDB(dbName), - tableName, ptnValues); - if (hivePartition != null) { - partition = new HCatPartition(hivePartition); - } - } catch (MetaException e) { - throw new HCatException( - "MetaException while retrieving partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving partition.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving partition.", e); - } - return partition; - } - - @Override - public void addPartition(HCatAddPartitionDesc partInfo) - throws HCatException { - Table tbl = null; - try { - tbl = hmsClient.getTable(partInfo.getDatabaseName(), - partInfo.getTableName()); - // TODO: Should be moved out. - if (tbl.getPartitionKeysSize() == 0) { - throw new HCatException("The table " + partInfo.getTableName() - + " is not partitioned."); - } - - hmsClient.add_partition(partInfo.toHivePartition(tbl)); - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while adding partition.", e); - } catch (AlreadyExistsException e) { - throw new HCatException( - "AlreadyExistsException while adding partition.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while adding partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException("The table " + partInfo.getTableName() - + " is could not be found.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while adding partition.", e); - } + StringBuilder filter = new StringBuilder(); + for (Map.Entry entry : partitionSpec.entrySet()) { + filter.append(entry.getKey()).append("=").append("\"").append(entry.getValue()).append("\"").append(AND); } - @Override - public void dropPartitions(String dbName, String tableName, - Map partitionSpec, boolean ifExists) - throws HCatException { - try { - dbName = checkDB(dbName); - List partitions = hmsClient.listPartitionsByFilter(dbName, tableName, - getFilterString(partitionSpec), (short)-1); - - for (Partition partition : partitions) { - dropPartition(partition, ifExists); - } - - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping partition. " + - "Either db(" + dbName + ") or table(" + tableName + ") missing.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while dropping partition.", - e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while dropping partition.", e); - } + int length = filter.toString().length(); + if (length > 0) + filter.delete(length - AND.length(), length); + + return filter.toString(); + } + + @Override + public HCatPartition getPartition(String dbName, String tableName, + Map partitionSpec) throws HCatException { + HCatPartition partition = null; + try { + List partitionColumns = getTable(checkDB(dbName), tableName).getPartCols(); + if (partitionColumns.size() != partitionSpec.size()) { + throw new HCatException("Partition-spec doesn't have the right number of partition keys."); + } + + ArrayList ptnValues = new ArrayList(); + for (HCatFieldSchema partitionColumn : partitionColumns) { + String partKey = partitionColumn.getName(); + if (partitionSpec.containsKey(partKey)) { + ptnValues.add(partitionSpec.get(partKey)); // Partition-keys added in order. + } + else { + throw new HCatException("Invalid partition-key specified: " + partKey); + } + } + Partition hivePartition = hmsClient.getPartition(checkDB(dbName), + tableName, ptnValues); + if (hivePartition != null) { + partition = new HCatPartition(hivePartition); + } + } catch (MetaException e) { + throw new HCatException( + "MetaException while retrieving partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving partition.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving partition.", e); } - - private void dropPartition(Partition partition, boolean ifExists) - throws HCatException, MetaException, TException { - try { - hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues()); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping partition: " + partition.getValues(), e); - } - } + return partition; + } + + @Override + public void addPartition(HCatAddPartitionDesc partInfo) + throws HCatException { + Table tbl = null; + try { + tbl = hmsClient.getTable(partInfo.getDatabaseName(), + partInfo.getTableName()); + // TODO: Should be moved out. + if (tbl.getPartitionKeysSize() == 0) { + throw new HCatException("The table " + partInfo.getTableName() + + " is not partitioned."); + } + + hmsClient.add_partition(partInfo.toHivePartition(tbl)); + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while adding partition.", e); + } catch (AlreadyExistsException e) { + throw new HCatException( + "AlreadyExistsException while adding partition.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while adding partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException("The table " + partInfo.getTableName() + + " is could not be found.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while adding partition.", e); } - - @Override - public List listPartitionsByFilter(String dbName, - String tblName, String filter) throws HCatException { - List hcatPtns = new ArrayList(); - try { - List hivePtns = hmsClient.listPartitionsByFilter( - checkDB(dbName), tblName, filter, (short) -1); - for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); - } - } catch (MetaException e) { - throw new HCatException("MetaException while fetching partitions.", - e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching partitions.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while fetching partitions.", e); - } - return hcatPtns; - } - - @Override - public void markPartitionForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException { - try { - hmsClient.markPartitionForEvent(checkDB(dbName), tblName, partKVs, - eventType); - } catch (MetaException e) { - throw new HCatException( - "MetaException while marking partition for event.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while marking partition for event.", - e); - } catch (UnknownTableException e) { - throw new HCatException( - "UnknownTableException while marking partition for event.", - e); - } catch (UnknownDBException e) { - throw new HCatException( - "UnknownDBException while marking partition for event.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while marking partition for event.", e); - } + } + + @Override + public void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists) + throws HCatException { + try { + dbName = checkDB(dbName); + List partitions = hmsClient.listPartitionsByFilter(dbName, tableName, + getFilterString(partitionSpec), (short)-1); + + for (Partition partition : partitions) { + dropPartition(partition, ifExists); + } + + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping partition. " + + "Either db(" + dbName + ") or table(" + tableName + ") missing.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while dropping partition.", + e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while dropping partition.", e); } - - @Override - public boolean isPartitionMarkedForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException { - boolean isMarked = false; - try { - isMarked = hmsClient.isPartitionMarkedForEvent(checkDB(dbName), - tblName, partKVs, eventType); - } catch (MetaException e) { - throw new HCatException( - "MetaException while checking partition for event.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while checking partition for event.", - e); - } catch (UnknownTableException e) { - throw new HCatException( - "UnknownTableException while checking partition for event.", - e); - } catch (UnknownDBException e) { - throw new HCatException( - "UnknownDBException while checking partition for event.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while checking partition for event.", e); - } - return isMarked; - } - - @Override - public String getDelegationToken(String owner, - String renewerKerberosPrincipalName) throws HCatException { - String token = null; - try { - token = hmsClient.getDelegationToken(owner, - renewerKerberosPrincipalName); - } catch (MetaException e) { - throw new HCatException( - "MetaException while getting delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while getting delegation token.", e); - } - - return token; + } + + private void dropPartition(Partition partition, boolean ifExists) + throws HCatException, MetaException, TException { + try { + hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues()); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping partition: " + partition.getValues(), e); + } } - - @Override - public long renewDelegationToken(String tokenStrForm) throws HCatException { - long time = 0; - try { - time = hmsClient.renewDelegationToken(tokenStrForm); - } catch (MetaException e) { - throw new HCatException( - "MetaException while renewing delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while renewing delegation token.", e); - } - - return time; + } + + @Override + public List listPartitionsByFilter(String dbName, + String tblName, String filter) throws HCatException { + List hcatPtns = new ArrayList(); + try { + List hivePtns = hmsClient.listPartitionsByFilter( + checkDB(dbName), tblName, filter, (short) -1); + for (Partition ptn : hivePtns) { + hcatPtns.add(new HCatPartition(ptn)); + } + } catch (MetaException e) { + throw new HCatException("MetaException while fetching partitions.", + e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching partitions.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while fetching partitions.", e); } - - @Override - public void cancelDelegationToken(String tokenStrForm) - throws HCatException { - try { - hmsClient.cancelDelegationToken(tokenStrForm); - } catch (MetaException e) { - throw new HCatException( - "MetaException while canceling delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while canceling delegation token.", e); - } + return hcatPtns; + } + + @Override + public void markPartitionForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException { + try { + hmsClient.markPartitionForEvent(checkDB(dbName), tblName, partKVs, + eventType); + } catch (MetaException e) { + throw new HCatException( + "MetaException while marking partition for event.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while marking partition for event.", + e); + } catch (UnknownTableException e) { + throw new HCatException( + "UnknownTableException while marking partition for event.", + e); + } catch (UnknownDBException e) { + throw new HCatException( + "UnknownDBException while marking partition for event.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while marking partition for event.", e); } - - /* - * @param conf /* @throws HCatException,ConnectionFailureException - * - * @see - * org.apache.hive.hcatalog.api.HCatClient#initialize(org.apache.hadoop.conf. - * Configuration) - */ - @Override - void initialize(Configuration conf) throws HCatException { - this.config = conf; - try { - hiveConfig = HCatUtil.getHiveConf(config); - hmsClient = HCatUtil.getHiveClient(hiveConfig); - } catch (MetaException exp) { - throw new HCatException("MetaException while creating HMS client", - exp); - } catch (IOException exp) { - throw new HCatException("IOException while creating HMS client", - exp); - } - + } + + @Override + public boolean isPartitionMarkedForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException { + boolean isMarked = false; + try { + isMarked = hmsClient.isPartitionMarkedForEvent(checkDB(dbName), + tblName, partKVs, eventType); + } catch (MetaException e) { + throw new HCatException( + "MetaException while checking partition for event.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while checking partition for event.", + e); + } catch (UnknownTableException e) { + throw new HCatException( + "UnknownTableException while checking partition for event.", + e); + } catch (UnknownDBException e) { + throw new HCatException( + "UnknownDBException while checking partition for event.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while checking partition for event.", e); } - - private Table getHiveTableLike(String dbName, String existingTblName, - String newTableName, boolean isExternal, String location) - throws HCatException { - Table oldtbl = null; - Table newTable = null; - try { - oldtbl = hmsClient.getTable(checkDB(dbName), existingTblName); - } catch (MetaException e1) { - throw new HCatException( - "MetaException while retrieving existing table.", e1); - } catch (NoSuchObjectException e1) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving existing table.", - e1); - } catch (TException e1) { - throw new ConnectionFailureException( - "TException while retrieving existing table.", e1); - } - if (oldtbl != null) { - newTable = new Table(); - newTable.setTableName(newTableName); - newTable.setDbName(dbName); - StorageDescriptor sd = new StorageDescriptor(oldtbl.getSd()); - newTable.setSd(sd); - newTable.setParameters(oldtbl.getParameters()); - if (location == null) { - newTable.getSd().setLocation(oldtbl.getSd().getLocation()); - } else { - newTable.getSd().setLocation(location); - } - if (isExternal) { - newTable.putToParameters("EXTERNAL", "TRUE"); - newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - newTable.getParameters().remove("EXTERNAL"); - } - // set create time - newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); - newTable.setLastAccessTimeIsSet(false); - } - return newTable; + return isMarked; + } + + @Override + public String getDelegationToken(String owner, + String renewerKerberosPrincipalName) throws HCatException { + String token = null; + try { + token = hmsClient.getDelegationToken(owner, + renewerKerberosPrincipalName); + } catch (MetaException e) { + throw new HCatException( + "MetaException while getting delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while getting delegation token.", e); } - /* - * @throws HCatException - * - * @see org.apache.hive.hcatalog.api.HCatClient#closeClient() - */ - @Override - public void close() throws HCatException { - hmsClient.close(); + return token; + } + + @Override + public long renewDelegationToken(String tokenStrForm) throws HCatException { + long time = 0; + try { + time = hmsClient.renewDelegationToken(tokenStrForm); + } catch (MetaException e) { + throw new HCatException( + "MetaException while renewing delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while renewing delegation token.", e); } - private String checkDB(String name) { - if (StringUtils.isEmpty(name)) { - return MetaStoreUtils.DEFAULT_DATABASE_NAME; - } else { - return name; - } + return time; + } + + @Override + public void cancelDelegationToken(String tokenStrForm) + throws HCatException { + try { + hmsClient.cancelDelegationToken(tokenStrForm); + } catch (MetaException e) { + throw new HCatException( + "MetaException while canceling delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while canceling delegation token.", e); + } + } + + /* + * @param conf /* @throws HCatException,ConnectionFailureException + * + * @see + * org.apache.hive.hcatalog.api.HCatClient#initialize(org.apache.hadoop.conf. + * Configuration) + */ + @Override + void initialize(Configuration conf) throws HCatException { + this.config = conf; + try { + hiveConfig = HCatUtil.getHiveConf(config); + hmsClient = HCatUtil.getHiveClient(hiveConfig); + } catch (MetaException exp) { + throw new HCatException("MetaException while creating HMS client", + exp); + } catch (IOException exp) { + throw new HCatException("IOException while creating HMS client", + exp); } - /* - * @param partInfoList - * @return The size of the list of partitions. - * @throws HCatException,ConnectionFailureException - * @see org.apache.hive.hcatalog.api.HCatClient#addPartitions(java.util.List) - */ - @Override - public int addPartitions(List partInfoList) - throws HCatException { - int numPartitions = -1; - if ((partInfoList == null) || (partInfoList.size() == 0)) { - throw new HCatException("The partition list is null or empty."); - } + } + + private Table getHiveTableLike(String dbName, String existingTblName, + String newTableName, boolean isExternal, String location) + throws HCatException { + Table oldtbl = null; + Table newTable = null; + try { + oldtbl = hmsClient.getTable(checkDB(dbName), existingTblName); + } catch (MetaException e1) { + throw new HCatException( + "MetaException while retrieving existing table.", e1); + } catch (NoSuchObjectException e1) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving existing table.", + e1); + } catch (TException e1) { + throw new ConnectionFailureException( + "TException while retrieving existing table.", e1); + } + if (oldtbl != null) { + newTable = new Table(); + newTable.setTableName(newTableName); + newTable.setDbName(dbName); + StorageDescriptor sd = new StorageDescriptor(oldtbl.getSd()); + newTable.setSd(sd); + newTable.setParameters(oldtbl.getParameters()); + if (location == null) { + newTable.getSd().setLocation(oldtbl.getSd().getLocation()); + } else { + newTable.getSd().setLocation(location); + } + if (isExternal) { + newTable.putToParameters("EXTERNAL", "TRUE"); + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + newTable.getParameters().remove("EXTERNAL"); + } + // set create time + newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + newTable.setLastAccessTimeIsSet(false); + } + return newTable; + } + + /* + * @throws HCatException + * + * @see org.apache.hive.hcatalog.api.HCatClient#closeClient() + */ + @Override + public void close() throws HCatException { + hmsClient.close(); + } + + private String checkDB(String name) { + if (StringUtils.isEmpty(name)) { + return MetaStoreUtils.DEFAULT_DATABASE_NAME; + } else { + return name; + } + } + + /* + * @param partInfoList + * @return The size of the list of partitions. + * @throws HCatException,ConnectionFailureException + * @see org.apache.hive.hcatalog.api.HCatClient#addPartitions(java.util.List) + */ + @Override + public int addPartitions(List partInfoList) + throws HCatException { + int numPartitions = -1; + if ((partInfoList == null) || (partInfoList.size() == 0)) { + throw new HCatException("The partition list is null or empty."); + } - Table tbl = null; - try { - tbl = hmsClient.getTable(partInfoList.get(0).getDatabaseName(), - partInfoList.get(0).getTableName()); - ArrayList ptnList = new ArrayList(); - for (HCatAddPartitionDesc desc : partInfoList) { - ptnList.add(desc.toHivePartition(tbl)); - } - numPartitions = hmsClient.add_partitions(ptnList); - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while adding partition.", e); - } catch (AlreadyExistsException e) { - throw new HCatException( - "AlreadyExistsException while adding partition.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while adding partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException("The table " - + partInfoList.get(0).getTableName() - + " is could not be found.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while adding partition.", e); - } - return numPartitions; + Table tbl = null; + try { + tbl = hmsClient.getTable(partInfoList.get(0).getDatabaseName(), + partInfoList.get(0).getTableName()); + ArrayList ptnList = new ArrayList(); + for (HCatAddPartitionDesc desc : partInfoList) { + ptnList.add(desc.toHivePartition(tbl)); + } + numPartitions = hmsClient.add_partitions(ptnList); + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while adding partition.", e); + } catch (AlreadyExistsException e) { + throw new HCatException( + "AlreadyExistsException while adding partition.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while adding partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException("The table " + + partInfoList.get(0).getTableName() + + " is could not be found.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while adding partition.", e); } + return numPartitions; + } - @Override - public String getMessageBusTopicName(String dbName, String tableName) throws HCatException { - try { - return hmsClient.getTable(dbName, tableName).getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); - } - catch (MetaException e) { - throw new HCatException("MetaException while retrieving JMS Topic name.", e); - } catch (NoSuchObjectException e) { - throw new HCatException("Could not find DB:" + dbName + " or Table:" + tableName, e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving JMS Topic name.", e); - } + @Override + public String getMessageBusTopicName(String dbName, String tableName) throws HCatException { + try { + return hmsClient.getTable(dbName, tableName).getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); + } + catch (MetaException e) { + throw new HCatException("MetaException while retrieving JMS Topic name.", e); + } catch (NoSuchObjectException e) { + throw new HCatException("Could not find DB:" + dbName + " or Table:" + tableName, e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving JMS Topic name.", e); } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateDBDesc.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateDBDesc.java index acda55b..fb05104 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateDBDesc.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateDBDesc.java @@ -28,167 +28,167 @@ */ public class HCatCreateDBDesc { + private String dbName; + private String locationUri; + private String comment; + private Map dbProperties; + private boolean ifNotExits = false; + + /** + * Gets the database properties. + * + * @return the database properties + */ + public Map getDatabaseProperties() { + return this.dbProperties; + } + + /** + * Gets the if not exists. + * + * @return the if not exists + */ + public boolean getIfNotExists() { + return this.ifNotExits; + } + + /** + * Gets the comments. + * + * @return the comments + */ + public String getComments() { + return this.comment; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.locationUri; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + private HCatCreateDBDesc(String dbName) { + this.dbName = dbName; + } + + @Override + public String toString() { + return "HCatCreateDBDesc [" + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (locationUri != null ? "location=" + locationUri + ", " + : "location=null") + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (dbProperties != null ? "dbProperties=" + dbProperties + ", " + : "dbProperties=null") + "ifNotExits=" + ifNotExits + "]"; + } + + /** + * Creates the builder for defining attributes. + * + * @param dbName the db name + * @return the builder + */ + public static Builder create(String dbName) { + return new Builder(dbName); + } + + Database toHiveDb() { + Database hiveDB = new Database(); + hiveDB.setDescription(this.comment); + hiveDB.setLocationUri(this.locationUri); + hiveDB.setName(this.dbName); + hiveDB.setParameters(this.dbProperties); + return hiveDB; + } + + public static class Builder { + + private String innerLoc; + private String innerComment; + private Map innerDBProps; private String dbName; - private String locationUri; - private String comment; - private Map dbProperties; - private boolean ifNotExits = false; + private boolean ifNotExists = false; - /** - * Gets the database properties. - * - * @return the database properties - */ - public Map getDatabaseProperties() { - return this.dbProperties; + private Builder(String dbName) { + this.dbName = dbName; } /** - * Gets the if not exists. + * Location. * - * @return the if not exists + * @param value the location of the database. + * @return the builder */ - public boolean getIfNotExists() { - return this.ifNotExits; + public Builder location(String value) { + this.innerLoc = value; + return this; } /** - * Gets the comments. + * Comment. * - * @return the comments + * @param value comments. + * @return the builder */ - public String getComments() { - return this.comment; + public Builder comment(String value) { + this.innerComment = value; + return this; } /** - * Gets the location. - * - * @return the location + * If not exists. + * @param ifNotExists If set to true, hive will not throw exception, if a + * database with the same name already exists. + * @return the builder */ - public String getLocation() { - return this.locationUri; + public Builder ifNotExists(boolean ifNotExists) { + this.ifNotExists = ifNotExists; + return this; } /** - * Gets the database name. + * Database properties. * - * @return the database name + * @param dbProps the database properties + * @return the builder */ - public String getDatabaseName() { - return this.dbName; + public Builder databaseProperties(Map dbProps) { + this.innerDBProps = dbProps; + return this; } - private HCatCreateDBDesc(String dbName) { - this.dbName = dbName; - } - - @Override - public String toString() { - return "HCatCreateDBDesc [" - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (locationUri != null ? "location=" + locationUri + ", " - : "location=null") - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (dbProperties != null ? "dbProperties=" + dbProperties + ", " - : "dbProperties=null") + "ifNotExits=" + ifNotExits + "]"; - } /** - * Creates the builder for defining attributes. + * Builds the create database descriptor. * - * @param dbName the db name - * @return the builder + * @return An instance of HCatCreateDBDesc + * @throws HCatException */ - public static Builder create(String dbName) { - return new Builder(dbName); - } + public HCatCreateDBDesc build() throws HCatException { + if (this.dbName == null) { + throw new HCatException("Database name cannot be null."); + } + HCatCreateDBDesc desc = new HCatCreateDBDesc(this.dbName); + desc.comment = this.innerComment; + desc.locationUri = this.innerLoc; + desc.dbProperties = this.innerDBProps; + desc.ifNotExits = this.ifNotExists; + return desc; - Database toHiveDb() { - Database hiveDB = new Database(); - hiveDB.setDescription(this.comment); - hiveDB.setLocationUri(this.locationUri); - hiveDB.setName(this.dbName); - hiveDB.setParameters(this.dbProperties); - return hiveDB; } - public static class Builder { - - private String innerLoc; - private String innerComment; - private Map innerDBProps; - private String dbName; - private boolean ifNotExists = false; - - private Builder(String dbName) { - this.dbName = dbName; - } - - /** - * Location. - * - * @param value the location of the database. - * @return the builder - */ - public Builder location(String value) { - this.innerLoc = value; - return this; - } - - /** - * Comment. - * - * @param value comments. - * @return the builder - */ - public Builder comment(String value) { - this.innerComment = value; - return this; - } - - /** - * If not exists. - * @param ifNotExists If set to true, hive will not throw exception, if a - * database with the same name already exists. - * @return the builder - */ - public Builder ifNotExists(boolean ifNotExists) { - this.ifNotExists = ifNotExists; - return this; - } - - /** - * Database properties. - * - * @param dbProps the database properties - * @return the builder - */ - public Builder databaseProperties(Map dbProps) { - this.innerDBProps = dbProps; - return this; - } - - - /** - * Builds the create database descriptor. - * - * @return An instance of HCatCreateDBDesc - * @throws HCatException - */ - public HCatCreateDBDesc build() throws HCatException { - if (this.dbName == null) { - throw new HCatException("Database name cannot be null."); - } - HCatCreateDBDesc desc = new HCatCreateDBDesc(this.dbName); - desc.comment = this.innerComment; - desc.locationUri = this.innerLoc; - desc.dbProperties = this.innerDBProps; - desc.ifNotExits = this.ifNotExists; - return desc; - - } - - } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java index a28fa08..d706e07 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java @@ -53,468 +53,468 @@ @SuppressWarnings("deprecation") public class HCatCreateTableDesc { - private static final Logger LOG = LoggerFactory.getLogger(HCatCreateTableDesc.class); + private static final Logger LOG = LoggerFactory.getLogger(HCatCreateTableDesc.class); + + private String tableName; + private String dbName; + private boolean isExternal; + private String comment; + private String location; + private List cols; + private List partCols; + private List bucketCols; + private int numBuckets; + private List sortCols; + private Map tblProps; + private boolean ifNotExists; + private String fileFormat; + private String inputformat; + private String outputformat; + private String serde; + private String storageHandler; + + private HCatCreateTableDesc(String dbName, String tableName, List columns) { + this.dbName = dbName; + this.tableName = tableName; + this.cols = columns; + } + + /** + * Creates a builder for defining attributes. + * + * @param dbName the db name + * @param tableName the table name + * @param columns the columns + * @return the builder + */ + public static Builder create(String dbName, String tableName, List columns) { + return new Builder(dbName, tableName, columns); + } + + Table toHiveTable(HiveConf conf) throws HCatException { + + Table newTable = new Table(); + newTable.setDbName(dbName); + newTable.setTableName(tableName); + if (tblProps != null) { + newTable.setParameters(tblProps); + } + + if (isExternal) { + newTable.putToParameters("EXTERNAL", "TRUE"); + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + newTable.setTableType(TableType.MANAGED_TABLE.toString()); + } + + StorageDescriptor sd = new StorageDescriptor(); + sd.setSerdeInfo(new SerDeInfo()); + if (location != null) { + sd.setLocation(location); + } + if (this.comment != null) { + newTable.putToParameters("comment", comment); + } + if (!StringUtils.isEmpty(fileFormat)) { + sd.setInputFormat(inputformat); + sd.setOutputFormat(outputformat); + if (serde != null) { + sd.getSerdeInfo().setSerializationLib(serde); + } else { + LOG.info("Using LazySimpleSerDe for table " + tableName); + sd.getSerdeInfo() + .setSerializationLib( + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class + .getName()); + } + } else { + try { + LOG.info("Creating instance of storage handler to get input/output, serder info."); + HiveStorageHandler sh = HiveUtils.getStorageHandler(conf, + storageHandler); + sd.setInputFormat(sh.getInputFormatClass().getName()); + sd.setOutputFormat(sh.getOutputFormatClass().getName()); + sd.getSerdeInfo().setSerializationLib( + sh.getSerDeClass().getName()); + newTable.putToParameters( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + storageHandler); + } catch (HiveException e) { + throw new HCatException( + "Exception while creating instance of storage handler", + e); + } + } + newTable.setSd(sd); + if (this.partCols != null) { + ArrayList hivePtnCols = new ArrayList(); + for (HCatFieldSchema fs : this.partCols) { + hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); + } + newTable.setPartitionKeys(hivePtnCols); + } + + if (this.cols != null) { + ArrayList hiveTblCols = new ArrayList(); + for (HCatFieldSchema fs : this.cols) { + hiveTblCols.add(HCatSchemaUtils.getFieldSchema(fs)); + } + newTable.getSd().setCols(hiveTblCols); + } + + if (this.bucketCols != null) { + newTable.getSd().setBucketCols(bucketCols); + newTable.getSd().setNumBuckets(numBuckets); + } + + if (this.sortCols != null) { + newTable.getSd().setSortCols(sortCols); + } + + newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + newTable.setLastAccessTimeIsSet(false); + return newTable; + } + + /** + * Gets the if not exists. + * + * @return the if not exists + */ + public boolean getIfNotExists() { + return this.ifNotExists; + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the cols. + * + * @return the cols + */ + public List getCols() { + return this.cols; + } + + /** + * Gets the partition cols. + * + * @return the partition cols + */ + public List getPartitionCols() { + return this.partCols; + } + + /** + * Gets the bucket cols. + * + * @return the bucket cols + */ + public List getBucketCols() { + return this.bucketCols; + } + + public int getNumBuckets() { + return this.numBuckets; + } + + /** + * Gets the comments. + * + * @return the comments + */ + public String getComments() { + return this.comment; + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return this.storageHandler; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.location; + } + + /** + * Gets the external. + * + * @return the external + */ + public boolean getExternal() { + return this.isExternal; + } + + /** + * Gets the sort cols. + * + * @return the sort cols + */ + public List getSortCols() { + return this.sortCols; + } + + /** + * Gets the tbl props. + * + * @return the tbl props + */ + public Map getTblProps() { + return this.tblProps; + } + + /** + * Gets the file format. + * + * @return the file format + */ + public String getFileFormat() { + return this.fileFormat; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + @Override + public String toString() { + return "HCatCreateTableDesc [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + "isExternal=" + + isExternal + + ", " + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (location != null ? "location=" + location + ", " : "location=null") + + (cols != null ? "cols=" + cols + ", " : "cols=null") + + (partCols != null ? "partCols=" + partCols + ", " : "partCols=null") + + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") + + "numBuckets=" + + numBuckets + + ", " + + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") + + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") + + "ifNotExists=" + + ifNotExists + + ", " + + (fileFormat != null ? "fileFormat=" + fileFormat + ", " : "fileFormat=null") + + (inputformat != null ? "inputformat=" + inputformat + ", " + : "inputformat=null") + + (outputformat != null ? "outputformat=" + outputformat + ", " + : "outputformat=null") + + (serde != null ? "serde=" + serde + ", " : "serde=null") + + (storageHandler != null ? "storageHandler=" + storageHandler + : "storageHandler=null") + "]"; + } + + public static class Builder { private String tableName; - private String dbName; private boolean isExternal; - private String comment; - private String location; private List cols; private List partCols; private List bucketCols; - private int numBuckets; private List sortCols; - private Map tblProps; - private boolean ifNotExists; + private int numBuckets; + private String comment; private String fileFormat; - private String inputformat; - private String outputformat; - private String serde; + private String location; private String storageHandler; + private Map tblProps; + private boolean ifNotExists; + private String dbName; - private HCatCreateTableDesc(String dbName, String tableName, List columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; - } - /** - * Creates a builder for defining attributes. - * - * @param dbName the db name - * @param tableName the table name - * @param columns the columns - * @return the builder - */ - public static Builder create(String dbName, String tableName, List columns) { - return new Builder(dbName, tableName, columns); + private Builder(String dbName, String tableName, List columns) { + this.dbName = dbName; + this.tableName = tableName; + this.cols = columns; } - Table toHiveTable(HiveConf conf) throws HCatException { - - Table newTable = new Table(); - newTable.setDbName(dbName); - newTable.setTableName(tableName); - if (tblProps != null) { - newTable.setParameters(tblProps); - } - - if (isExternal) { - newTable.putToParameters("EXTERNAL", "TRUE"); - newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - newTable.setTableType(TableType.MANAGED_TABLE.toString()); - } - - StorageDescriptor sd = new StorageDescriptor(); - sd.setSerdeInfo(new SerDeInfo()); - if (location != null) { - sd.setLocation(location); - } - if (this.comment != null) { - newTable.putToParameters("comment", comment); - } - if (!StringUtils.isEmpty(fileFormat)) { - sd.setInputFormat(inputformat); - sd.setOutputFormat(outputformat); - if (serde != null) { - sd.getSerdeInfo().setSerializationLib(serde); - } else { - LOG.info("Using LazySimpleSerDe for table " + tableName); - sd.getSerdeInfo() - .setSerializationLib( - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class - .getName()); - } - } else { - try { - LOG.info("Creating instance of storage handler to get input/output, serder info."); - HiveStorageHandler sh = HiveUtils.getStorageHandler(conf, - storageHandler); - sd.setInputFormat(sh.getInputFormatClass().getName()); - sd.setOutputFormat(sh.getOutputFormatClass().getName()); - sd.getSerdeInfo().setSerializationLib( - sh.getSerDeClass().getName()); - newTable.putToParameters( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - storageHandler); - } catch (HiveException e) { - throw new HCatException( - "Exception while creating instance of storage handler", - e); - } - } - newTable.setSd(sd); - if (this.partCols != null) { - ArrayList hivePtnCols = new ArrayList(); - for (HCatFieldSchema fs : this.partCols) { - hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.setPartitionKeys(hivePtnCols); - } - - if (this.cols != null) { - ArrayList hiveTblCols = new ArrayList(); - for (HCatFieldSchema fs : this.cols) { - hiveTblCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.getSd().setCols(hiveTblCols); - } - - if (this.bucketCols != null) { - newTable.getSd().setBucketCols(bucketCols); - newTable.getSd().setNumBuckets(numBuckets); - } - - if (this.sortCols != null) { - newTable.getSd().setSortCols(sortCols); - } - - newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); - newTable.setLastAccessTimeIsSet(false); - return newTable; - } /** - * Gets the if not exists. + * If not exists. * - * @return the if not exists + * @param ifNotExists If set to true, hive will not throw exception, if a + * table with the same name already exists. + * @return the builder */ - public boolean getIfNotExists() { - return this.ifNotExists; + public Builder ifNotExists(boolean ifNotExists) { + this.ifNotExists = ifNotExists; + return this; } - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } /** - * Gets the cols. + * Partition cols. * - * @return the cols + * @param partCols the partition cols + * @return the builder */ - public List getCols() { - return this.cols; + public Builder partCols(List partCols) { + this.partCols = partCols; + return this; } - /** - * Gets the partition cols. - * - * @return the partition cols - */ - public List getPartitionCols() { - return this.partCols; - } /** - * Gets the bucket cols. + * Bucket cols. * - * @return the bucket cols + * @param bucketCols the bucket cols + * @return the builder */ - public List getBucketCols() { - return this.bucketCols; - } - - public int getNumBuckets() { - return this.numBuckets; + public Builder bucketCols(List bucketCols, int buckets) { + this.bucketCols = bucketCols; + this.numBuckets = buckets; + return this; } /** - * Gets the comments. + * Storage handler. * - * @return the comments + * @param storageHandler the storage handler + * @return the builder */ - public String getComments() { - return this.comment; + public Builder storageHandler(String storageHandler) { + this.storageHandler = storageHandler; + return this; } /** - * Gets the storage handler. + * Location. * - * @return the storage handler + * @param location the location + * @return the builder */ - public String getStorageHandler() { - return this.storageHandler; + public Builder location(String location) { + this.location = location; + return this; } /** - * Gets the location. + * Comments. * - * @return the location + * @param comment the comment + * @return the builder */ - public String getLocation() { - return this.location; + public Builder comments(String comment) { + this.comment = comment; + return this; } /** - * Gets the external. + * Checks if is table external. * - * @return the external + * @param isExternal the is external + * @return the builder */ - public boolean getExternal() { - return this.isExternal; + public Builder isTableExternal(boolean isExternal) { + this.isExternal = isExternal; + return this; } /** - * Gets the sort cols. + * Sort cols. * - * @return the sort cols + * @param sortCols the sort cols + * @return the builder */ - public List getSortCols() { - return this.sortCols; + public Builder sortCols(ArrayList sortCols) { + this.sortCols = sortCols; + return this; } /** - * Gets the tbl props. + * Tbl props. * - * @return the tbl props + * @param tblProps the tbl props + * @return the builder */ - public Map getTblProps() { - return this.tblProps; + public Builder tblProps(Map tblProps) { + this.tblProps = tblProps; + return this; } /** - * Gets the file format. + * File format. * - * @return the file format + * @param format the format + * @return the builder */ - public String getFileFormat() { - return this.fileFormat; + public Builder fileFormat(String format) { + this.fileFormat = format; + return this; } /** - * Gets the database name. + * Builds the HCatCreateTableDesc. * - * @return the database name + * @return HCatCreateTableDesc + * @throws HCatException */ - public String getDatabaseName() { - return this.dbName; - } - - @Override - public String toString() { - return "HCatCreateTableDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + "isExternal=" - + isExternal - + ", " - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols=null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + "ifNotExists=" - + ifNotExists - + ", " - + (fileFormat != null ? "fileFormat=" + fileFormat + ", " : "fileFormat=null") - + (inputformat != null ? "inputformat=" + inputformat + ", " - : "inputformat=null") - + (outputformat != null ? "outputformat=" + outputformat + ", " - : "outputformat=null") - + (serde != null ? "serde=" + serde + ", " : "serde=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - : "storageHandler=null") + "]"; - } - - public static class Builder { - - private String tableName; - private boolean isExternal; - private List cols; - private List partCols; - private List bucketCols; - private List sortCols; - private int numBuckets; - private String comment; - private String fileFormat; - private String location; - private String storageHandler; - private Map tblProps; - private boolean ifNotExists; - private String dbName; - - - private Builder(String dbName, String tableName, List columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; - } - - - /** - * If not exists. - * - * @param ifNotExists If set to true, hive will not throw exception, if a - * table with the same name already exists. - * @return the builder - */ - public Builder ifNotExists(boolean ifNotExists) { - this.ifNotExists = ifNotExists; - return this; - } - - - /** - * Partition cols. - * - * @param partCols the partition cols - * @return the builder - */ - public Builder partCols(List partCols) { - this.partCols = partCols; - return this; - } - - - /** - * Bucket cols. - * - * @param bucketCols the bucket cols - * @return the builder - */ - public Builder bucketCols(List bucketCols, int buckets) { - this.bucketCols = bucketCols; - this.numBuckets = buckets; - return this; - } - - /** - * Storage handler. - * - * @param storageHandler the storage handler - * @return the builder - */ - public Builder storageHandler(String storageHandler) { - this.storageHandler = storageHandler; - return this; - } - - /** - * Location. - * - * @param location the location - * @return the builder - */ - public Builder location(String location) { - this.location = location; - return this; - } - - /** - * Comments. - * - * @param comment the comment - * @return the builder - */ - public Builder comments(String comment) { - this.comment = comment; - return this; - } - - /** - * Checks if is table external. - * - * @param isExternal the is external - * @return the builder - */ - public Builder isTableExternal(boolean isExternal) { - this.isExternal = isExternal; - return this; - } - - /** - * Sort cols. - * - * @param sortCols the sort cols - * @return the builder - */ - public Builder sortCols(ArrayList sortCols) { - this.sortCols = sortCols; - return this; - } - - /** - * Tbl props. - * - * @param tblProps the tbl props - * @return the builder - */ - public Builder tblProps(Map tblProps) { - this.tblProps = tblProps; - return this; - } - - /** - * File format. - * - * @param format the format - * @return the builder - */ - public Builder fileFormat(String format) { - this.fileFormat = format; - return this; - } - - /** - * Builds the HCatCreateTableDesc. - * - * @return HCatCreateTableDesc - * @throws HCatException - */ - public HCatCreateTableDesc build() throws HCatException { - if (this.dbName == null) { - LOG.info("Database name found null. Setting db to :" - + MetaStoreUtils.DEFAULT_DATABASE_NAME); - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatCreateTableDesc desc = new HCatCreateTableDesc(this.dbName, - this.tableName, this.cols); - desc.ifNotExists = this.ifNotExists; - desc.isExternal = this.isExternal; - desc.comment = this.comment; - desc.partCols = this.partCols; - desc.bucketCols = this.bucketCols; - desc.numBuckets = this.numBuckets; - desc.location = this.location; - desc.tblProps = this.tblProps; - desc.sortCols = this.sortCols; - desc.serde = null; - if (!StringUtils.isEmpty(fileFormat)) { - desc.fileFormat = fileFormat; - if ("SequenceFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = SequenceFileInputFormat.class.getName(); - desc.outputformat = SequenceFileOutputFormat.class - .getName(); - } else if ("RCFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = RCFileInputFormat.class.getName(); - desc.outputformat = RCFileOutputFormat.class.getName(); - desc.serde = ColumnarSerDe.class.getName(); - } - desc.storageHandler = StringUtils.EMPTY; - } else if (!StringUtils.isEmpty(storageHandler)) { - desc.storageHandler = storageHandler; - } else { - desc.fileFormat = "TextFile"; - LOG.info("Using text file format for the table."); - desc.inputformat = TextInputFormat.class.getName(); - LOG.info("Table input format:" + desc.inputformat); - desc.outputformat = IgnoreKeyTextOutputFormat.class - .getName(); - LOG.info("Table output format:" + desc.outputformat); - } - return desc; + public HCatCreateTableDesc build() throws HCatException { + if (this.dbName == null) { + LOG.info("Database name found null. Setting db to :" + + MetaStoreUtils.DEFAULT_DATABASE_NAME); + this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + } + HCatCreateTableDesc desc = new HCatCreateTableDesc(this.dbName, + this.tableName, this.cols); + desc.ifNotExists = this.ifNotExists; + desc.isExternal = this.isExternal; + desc.comment = this.comment; + desc.partCols = this.partCols; + desc.bucketCols = this.bucketCols; + desc.numBuckets = this.numBuckets; + desc.location = this.location; + desc.tblProps = this.tblProps; + desc.sortCols = this.sortCols; + desc.serde = null; + if (!StringUtils.isEmpty(fileFormat)) { + desc.fileFormat = fileFormat; + if ("SequenceFile".equalsIgnoreCase(fileFormat)) { + desc.inputformat = SequenceFileInputFormat.class.getName(); + desc.outputformat = SequenceFileOutputFormat.class + .getName(); + } else if ("RCFile".equalsIgnoreCase(fileFormat)) { + desc.inputformat = RCFileInputFormat.class.getName(); + desc.outputformat = RCFileOutputFormat.class.getName(); + desc.serde = ColumnarSerDe.class.getName(); } + desc.storageHandler = StringUtils.EMPTY; + } else if (!StringUtils.isEmpty(storageHandler)) { + desc.storageHandler = storageHandler; + } else { + desc.fileFormat = "TextFile"; + LOG.info("Using text file format for the table."); + desc.inputformat = TextInputFormat.class.getName(); + LOG.info("Table input format:" + desc.inputformat); + desc.outputformat = IgnoreKeyTextOutputFormat.class + .getName(); + LOG.info("Table output format:" + desc.outputformat); + } + return desc; } + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatDatabase.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatDatabase.java index 4a0b935..5f1bf05 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatDatabase.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatDatabase.java @@ -28,61 +28,61 @@ */ public class HCatDatabase { - private String dbName; - private String dbLocation; - private String comment; - private Map props; + private String dbName; + private String dbLocation; + private String comment; + private Map props; - HCatDatabase(Database db) { - this.dbName = db.getName(); - this.props = db.getParameters(); - this.dbLocation = db.getLocationUri(); - this.comment = db.getDescription(); - } + HCatDatabase(Database db) { + this.dbName = db.getName(); + this.props = db.getParameters(); + this.dbLocation = db.getLocationUri(); + this.comment = db.getDescription(); + } - /** - * Gets the database name. - * - * @return the database name - */ - public String getName() { - return dbName; - } + /** + * Gets the database name. + * + * @return the database name + */ + public String getName() { + return dbName; + } - /** - * Gets the dB location. - * - * @return the dB location - */ - public String getLocation() { - return dbLocation; - } + /** + * Gets the dB location. + * + * @return the dB location + */ + public String getLocation() { + return dbLocation; + } - /** - * Gets the comment. - * - * @return the comment - */ - public String getComment() { - return comment; - } + /** + * Gets the comment. + * + * @return the comment + */ + public String getComment() { + return comment; + } - /** - * Gets the dB properties. - * - * @return the dB properties - */ - public Map getProperties() { - return props; - } + /** + * Gets the dB properties. + * + * @return the dB properties + */ + public Map getProperties() { + return props; + } - @Override - public String toString() { - return "HCatDatabase [" - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (dbLocation != null ? "dbLocation=" + dbLocation + ", " : "dbLocation=null") - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (props != null ? "props=" + props : "props=null") + "]"; - } + @Override + public String toString() { + return "HCatDatabase [" + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (dbLocation != null ? "dbLocation=" + dbLocation + ", " : "dbLocation=null") + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (props != null ? "props=" + props : "props=null") + "]"; + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java index 99a6d6f..ee1b6bf 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java @@ -35,170 +35,170 @@ */ public class HCatPartition { - private String tableName; - private String dbName; - private List values; - private List tableCols; - private int createTime; - private int lastAccessTime; - private StorageDescriptor sd; - private Map parameters; - - HCatPartition(Partition partition) throws HCatException { - this.tableName = partition.getTableName(); - this.dbName = partition.getDbName(); - this.createTime = partition.getCreateTime(); - this.lastAccessTime = partition.getLastAccessTime(); - this.parameters = partition.getParameters(); - this.values = partition.getValues(); - this.sd = partition.getSd(); - this.tableCols = new ArrayList(); - for (FieldSchema fs : this.sd.getCols()) { - this.tableCols.add(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } - - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } - - /** - * Gets the columns of the table. - * - * @return the columns - */ - public List getColumns() { - return this.tableCols; - } - - /** - * Gets the input format. - * - * @return the input format - */ - public String getInputFormat() { - return this.sd.getInputFormat(); - } - - /** - * Gets the output format. - * - * @return the output format - */ - public String getOutputFormat() { - return this.sd.getOutputFormat(); - } - - /** - * Gets the storage handler. - * - * @return the storage handler - */ - public String getStorageHandler() { - return this.sd - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.sd.getLocation(); - } - - /** - * Gets the serde. - * - * @return the serde - */ - public String getSerDe() { - return this.sd.getSerdeInfo().getSerializationLib(); - } - - public Map getParameters() { - return this.parameters; - } - - /** - * Gets the last access time. - * - * @return the last access time - */ - public int getLastAccessTime() { - return this.lastAccessTime; - } - - /** - * Gets the creates the time. - * - * @return the creates the time - */ - public int getCreateTime() { - return this.createTime; - } - - /** - * Gets the values. - * - * @return the values - */ - public List getValues() { - return this.values; - } - - /** - * Gets the bucket columns. - * - * @return the bucket columns - */ - public List getBucketCols() { - return this.sd.getBucketCols(); - } - - /** - * Gets the number of buckets. - * - * @return the number of buckets - */ - public int getNumBuckets() { - return this.sd.getNumBuckets(); - } - - /** - * Gets the sort columns. - * - * @return the sort columns - */ - public List getSortCols() { - return this.sd.getSortCols(); - } - - @Override - public String toString() { - return "HCatPartition [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (values != null ? "values=" + values + ", " : "values=null") - + "createTime=" + createTime + ", lastAccessTime=" - + lastAccessTime + ", " + (sd != null ? "sd=" + sd + ", " : "sd=null") - + (parameters != null ? "parameters=" + parameters : "parameters=null") + "]"; - } + private String tableName; + private String dbName; + private List values; + private List tableCols; + private int createTime; + private int lastAccessTime; + private StorageDescriptor sd; + private Map parameters; + + HCatPartition(Partition partition) throws HCatException { + this.tableName = partition.getTableName(); + this.dbName = partition.getDbName(); + this.createTime = partition.getCreateTime(); + this.lastAccessTime = partition.getLastAccessTime(); + this.parameters = partition.getParameters(); + this.values = partition.getValues(); + this.sd = partition.getSd(); + this.tableCols = new ArrayList(); + for (FieldSchema fs : this.sd.getCols()) { + this.tableCols.add(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + /** + * Gets the columns of the table. + * + * @return the columns + */ + public List getColumns() { + return this.tableCols; + } + + /** + * Gets the input format. + * + * @return the input format + */ + public String getInputFormat() { + return this.sd.getInputFormat(); + } + + /** + * Gets the output format. + * + * @return the output format + */ + public String getOutputFormat() { + return this.sd.getOutputFormat(); + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return this.sd + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.sd.getLocation(); + } + + /** + * Gets the serde. + * + * @return the serde + */ + public String getSerDe() { + return this.sd.getSerdeInfo().getSerializationLib(); + } + + public Map getParameters() { + return this.parameters; + } + + /** + * Gets the last access time. + * + * @return the last access time + */ + public int getLastAccessTime() { + return this.lastAccessTime; + } + + /** + * Gets the creates the time. + * + * @return the creates the time + */ + public int getCreateTime() { + return this.createTime; + } + + /** + * Gets the values. + * + * @return the values + */ + public List getValues() { + return this.values; + } + + /** + * Gets the bucket columns. + * + * @return the bucket columns + */ + public List getBucketCols() { + return this.sd.getBucketCols(); + } + + /** + * Gets the number of buckets. + * + * @return the number of buckets + */ + public int getNumBuckets() { + return this.sd.getNumBuckets(); + } + + /** + * Gets the sort columns. + * + * @return the sort columns + */ + public List getSortCols() { + return this.sd.getSortCols(); + } + + @Override + public String toString() { + return "HCatPartition [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (values != null ? "values=" + values + ", " : "values=null") + + "createTime=" + createTime + ", lastAccessTime=" + + lastAccessTime + ", " + (sd != null ? "sd=" + sd + ", " : "sd=null") + + (parameters != null ? "parameters=" + parameters : "parameters=null") + "]"; + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java index e43227b..47c1470 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java @@ -34,194 +34,194 @@ */ public class HCatTable { - private String tableName; - private String tabletype; - private List cols; - private List partCols; - private List bucketCols; - private List sortCols; - private int numBuckets; - private String inputFileFormat; - private String outputFileFormat; - private String storageHandler; - private Map tblProps; - private String dbName; - private String serde; - private String location; - - HCatTable(Table hiveTable) throws HCatException { - this.tableName = hiveTable.getTableName(); - this.dbName = hiveTable.getDbName(); - this.tabletype = hiveTable.getTableType(); - cols = new ArrayList(); - for (FieldSchema colFS : hiveTable.getSd().getCols()) { - cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); - } - partCols = new ArrayList(); - for (FieldSchema colFS : hiveTable.getPartitionKeys()) { - partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); - } - bucketCols = hiveTable.getSd().getBucketCols(); - sortCols = hiveTable.getSd().getSortCols(); - numBuckets = hiveTable.getSd().getNumBuckets(); - inputFileFormat = hiveTable.getSd().getInputFormat(); - outputFileFormat = hiveTable.getSd().getOutputFormat(); - storageHandler = hiveTable - .getSd() - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); - tblProps = hiveTable.getParameters(); - serde = hiveTable.getSd().getSerdeInfo().getSerializationLib(); - location = hiveTable.getSd().getLocation(); - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return tableName; - } - - /** - * Gets the db name. - * - * @return the db name - */ - public String getDbName() { - return dbName; - } - - /** - * Gets the columns. - * - * @return the columns - */ - public List getCols() { - return cols; - } - - /** - * Gets the part columns. - * - * @return the part columns - */ - public List getPartCols() { - return partCols; - } - - /** - * Gets the bucket columns. - * - * @return the bucket columns - */ - public List getBucketCols() { - return bucketCols; - } - - /** - * Gets the sort columns. - * - * @return the sort columns - */ - public List getSortCols() { - return sortCols; - } - - /** - * Gets the number of buckets. - * - * @return the number of buckets - */ - public int getNumBuckets() { - return numBuckets; - } - - /** - * Gets the storage handler. - * - * @return the storage handler - */ - public String getStorageHandler() { - return storageHandler; - } - - /** - * Gets the table props. - * - * @return the table props - */ - public Map getTblProps() { - return tblProps; - } - - /** - * Gets the tabletype. - * - * @return the tabletype - */ - public String getTabletype() { - return tabletype; - } - - /** - * Gets the input file format. - * - * @return the input file format - */ - public String getInputFileFormat() { - return inputFileFormat; - } - - /** - * Gets the output file format. - * - * @return the output file format - */ - public String getOutputFileFormat() { - return outputFileFormat; - } - - /** - * Gets the serde lib. - * - * @return the serde lib - */ - public String getSerdeLib() { - return serde; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return location; - } - - @Override - public String toString() { - return "HCatTable [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (tabletype != null ? "tabletype=" + tabletype + ", " : "tabletype=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols==null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (inputFileFormat != null ? "inputFileFormat=" - + inputFileFormat + ", " : "inputFileFormat=null") - + (outputFileFormat != null ? "outputFileFormat=" - + outputFileFormat + ", " : "outputFileFormat=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - + ", " : "storageHandler=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + (serde != null ? "serde=" + serde + ", " : "serde=") - + (location != null ? "location=" + location : "location=") + "]"; - } + private String tableName; + private String tabletype; + private List cols; + private List partCols; + private List bucketCols; + private List sortCols; + private int numBuckets; + private String inputFileFormat; + private String outputFileFormat; + private String storageHandler; + private Map tblProps; + private String dbName; + private String serde; + private String location; + + HCatTable(Table hiveTable) throws HCatException { + this.tableName = hiveTable.getTableName(); + this.dbName = hiveTable.getDbName(); + this.tabletype = hiveTable.getTableType(); + cols = new ArrayList(); + for (FieldSchema colFS : hiveTable.getSd().getCols()) { + cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); + } + partCols = new ArrayList(); + for (FieldSchema colFS : hiveTable.getPartitionKeys()) { + partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); + } + bucketCols = hiveTable.getSd().getBucketCols(); + sortCols = hiveTable.getSd().getSortCols(); + numBuckets = hiveTable.getSd().getNumBuckets(); + inputFileFormat = hiveTable.getSd().getInputFormat(); + outputFileFormat = hiveTable.getSd().getOutputFormat(); + storageHandler = hiveTable + .getSd() + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + tblProps = hiveTable.getParameters(); + serde = hiveTable.getSd().getSerdeInfo().getSerializationLib(); + location = hiveTable.getSd().getLocation(); + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return tableName; + } + + /** + * Gets the db name. + * + * @return the db name + */ + public String getDbName() { + return dbName; + } + + /** + * Gets the columns. + * + * @return the columns + */ + public List getCols() { + return cols; + } + + /** + * Gets the part columns. + * + * @return the part columns + */ + public List getPartCols() { + return partCols; + } + + /** + * Gets the bucket columns. + * + * @return the bucket columns + */ + public List getBucketCols() { + return bucketCols; + } + + /** + * Gets the sort columns. + * + * @return the sort columns + */ + public List getSortCols() { + return sortCols; + } + + /** + * Gets the number of buckets. + * + * @return the number of buckets + */ + public int getNumBuckets() { + return numBuckets; + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return storageHandler; + } + + /** + * Gets the table props. + * + * @return the table props + */ + public Map getTblProps() { + return tblProps; + } + + /** + * Gets the tabletype. + * + * @return the tabletype + */ + public String getTabletype() { + return tabletype; + } + + /** + * Gets the input file format. + * + * @return the input file format + */ + public String getInputFileFormat() { + return inputFileFormat; + } + + /** + * Gets the output file format. + * + * @return the output file format + */ + public String getOutputFileFormat() { + return outputFileFormat; + } + + /** + * Gets the serde lib. + * + * @return the serde lib + */ + public String getSerdeLib() { + return serde; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return location; + } + + @Override + public String toString() { + return "HCatTable [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (tabletype != null ? "tabletype=" + tabletype + ", " : "tabletype=null") + + (cols != null ? "cols=" + cols + ", " : "cols=null") + + (partCols != null ? "partCols=" + partCols + ", " : "partCols==null") + + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") + + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") + + "numBuckets=" + + numBuckets + + ", " + + (inputFileFormat != null ? "inputFileFormat=" + + inputFileFormat + ", " : "inputFileFormat=null") + + (outputFileFormat != null ? "outputFileFormat=" + + outputFileFormat + ", " : "outputFileFormat=null") + + (storageHandler != null ? "storageHandler=" + storageHandler + + ", " : "storageHandler=null") + + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") + + (serde != null ? "serde=" + serde + ", " : "serde=") + + (location != null ? "location=" + location : "location=") + "]"; + } } diff --git a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ObjectNotFoundException.java b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ObjectNotFoundException.java index af6815a..194fd07 100644 --- a/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ObjectNotFoundException.java +++ b/hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ObjectNotFoundException.java @@ -27,13 +27,13 @@ */ public class ObjectNotFoundException extends HCatException { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - /** - * @param message Exception message. - * @param cause The wrapped Throwable that caused this exception. - */ - public ObjectNotFoundException(String message, Throwable cause) { - super(message, cause); - } + /** + * @param message Exception message. + * @param cause The wrapped Throwable that caused this exception. + */ + public ObjectNotFoundException(String message, Throwable cause) { + super(message, cause); + } } diff --git a/hcatalog/webhcat/java-client/src/test/java/org/apache/hcatalog/api/TestHCatClient.java b/hcatalog/webhcat/java-client/src/test/java/org/apache/hcatalog/api/TestHCatClient.java index 5b4ae16..b33a8c3 100644 --- a/hcatalog/webhcat/java-client/src/test/java/org/apache/hcatalog/api/TestHCatClient.java +++ b/hcatalog/webhcat/java-client/src/test/java/org/apache/hcatalog/api/TestHCatClient.java @@ -57,583 +57,583 @@ * @deprecated Use/modify {@link org.apache.hive.hcatalog.api.TestHCatClient} instead */ public class TestHCatClient { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); - private static final String msPort = "20101"; - private static HiveConf hcatConf; - private static SecurityManager securityManager; - - private static class RunMS implements Runnable { - - @Override - public void run() { - try { - HiveMetaStore.main(new String[]{"-v", "-p", msPort}); - } catch (Throwable t) { - LOG.error("Exiting. Got exception from metastore: ", t); - } - } + private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); + private static final String msPort = "20101"; + private static HiveConf hcatConf; + private static SecurityManager securityManager; + + private static class RunMS implements Runnable { + + @Override + public void run() { + try { + HiveMetaStore.main(new String[]{"-v", "-p", msPort}); + } catch (Throwable t) { + LOG.error("Exiting. Got exception from metastore: ", t); + } } - - @AfterClass - public static void tearDown() throws Exception { - LOG.info("Shutting down metastore."); - System.setSecurityManager(securityManager); - } - - @BeforeClass - public static void startMetaStoreServer() throws Exception { - - Thread t = new Thread(new RunMS()); - t.start(); - Thread.sleep(40000); - - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - hcatConf = new HiveConf(TestHCatClient.class); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - } - - @Test - public void testBasicDDLCommands() throws Exception { - String db = "testdb"; - String tableOne = "testTable1"; - String tableTwo = "testTable2"; - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(db).ifNotExists(false) - .build(); - client.createDatabase(dbDesc); - List dbNames = client.listDatabaseNamesByPattern("*"); - assertTrue(dbNames.contains("default")); - assertTrue(dbNames.contains(db)); - - HCatDatabase testDb = client.getDatabase(db); - assertTrue(testDb.getComment() == null); - assertTrue(testDb.getProperties().size() == 0); - String warehouseDir = System - .getProperty(ConfVars.METASTOREWAREHOUSE.varname, "/user/hive/warehouse"); - assertTrue(testDb.getLocation().equals( - "file:" + warehouseDir + "/" + db + ".db")); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); - cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(db, tableOne, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - HCatTable table1 = client.getTable(db, tableOne); - assertTrue(table1.getInputFileFormat().equalsIgnoreCase( - RCFileInputFormat.class.getName())); - assertTrue(table1.getOutputFileFormat().equalsIgnoreCase( - RCFileOutputFormat.class.getName())); - assertTrue(table1.getSerdeLib().equalsIgnoreCase( - ColumnarSerDe.class.getName())); - assertTrue(table1.getCols().equals(cols)); - // Since "ifexists" was not set to true, trying to create the same table - // again - // will result in an exception. - try { - client.createTable(tableDesc); - } catch (HCatException e) { - assertTrue(e.getMessage().contains( - "AlreadyExistsException while creating table.")); - } - - client.dropTable(db, tableOne, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc.create(db, - tableTwo, cols).build(); - client.createTable(tableDesc2); - HCatTable table2 = client.getTable(db, tableTwo); - assertTrue(table2.getInputFileFormat().equalsIgnoreCase( - TextInputFormat.class.getName())); - assertTrue(table2.getOutputFileFormat().equalsIgnoreCase( - IgnoreKeyTextOutputFormat.class.getName())); - assertTrue(table2.getLocation().equalsIgnoreCase( - "file:" + warehouseDir + "/" + db + ".db/" + tableTwo)); - client.close(); - } - - @Test - public void testPartitionsHCatClientImpl() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "ptnDB"; - String tableName = "pageView"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) - .ifNotExists(true).build(); - client.createDatabase(dbDesc); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("userid", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("viewtime", Type.BIGINT, - "view time columns")); - cols.add(new HCatFieldSchema("pageurl", Type.STRING, "")); - cols.add(new HCatFieldSchema("ip", Type.STRING, - "IP Address of the User")); - - ArrayList ptnCols = new ArrayList(); - ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column")); - ptnCols.add(new HCatFieldSchema("country", Type.STRING, - "country column")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(dbName, tableName, cols).fileFormat("sequencefile") - .partCols(ptnCols).build(); - client.createTable(tableDesc); - - Map firstPtn = new HashMap(); - firstPtn.put("dt", "04/30/2012"); - firstPtn.put("country", "usa"); - HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(dbName, - tableName, null, firstPtn).build(); - client.addPartition(addPtn); - - Map secondPtn = new HashMap(); - secondPtn.put("dt", "04/12/2012"); - secondPtn.put("country", "brazil"); - HCatAddPartitionDesc addPtn2 = HCatAddPartitionDesc.create(dbName, - tableName, null, secondPtn).build(); - client.addPartition(addPtn2); - - Map thirdPtn = new HashMap(); - thirdPtn.put("dt", "04/13/2012"); - thirdPtn.put("country", "argentina"); - HCatAddPartitionDesc addPtn3 = HCatAddPartitionDesc.create(dbName, - tableName, null, thirdPtn).build(); - client.addPartition(addPtn3); - - List ptnList = client.listPartitionsByFilter(dbName, - tableName, null); - assertTrue(ptnList.size() == 3); - - HCatPartition ptn = client.getPartition(dbName, tableName, firstPtn); - assertTrue(ptn != null); - - client.dropPartitions(dbName, tableName, firstPtn, true); - ptnList = client.listPartitionsByFilter(dbName, - tableName, null); - assertTrue(ptnList.size() == 2); - - List ptnListTwo = client.listPartitionsByFilter(dbName, - tableName, "country = \"argentina\""); - assertTrue(ptnListTwo.size() == 1); - - client.markPartitionForEvent(dbName, tableName, thirdPtn, - PartitionEventType.LOAD_DONE); - boolean isMarked = client.isPartitionMarkedForEvent(dbName, tableName, - thirdPtn, PartitionEventType.LOAD_DONE); - assertTrue(isMarked); - client.close(); + } + + @AfterClass + public static void tearDown() throws Exception { + LOG.info("Shutting down metastore."); + System.setSecurityManager(securityManager); + } + + @BeforeClass + public static void startMetaStoreServer() throws Exception { + + Thread t = new Thread(new RunMS()); + t.start(); + Thread.sleep(40000); + + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + hcatConf = new HiveConf(TestHCatClient.class); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + @Test + public void testBasicDDLCommands() throws Exception { + String db = "testdb"; + String tableOne = "testTable1"; + String tableTwo = "testTable2"; + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(db).ifNotExists(false) + .build(); + client.createDatabase(dbDesc); + List dbNames = client.listDatabaseNamesByPattern("*"); + assertTrue(dbNames.contains("default")); + assertTrue(dbNames.contains(db)); + + HCatDatabase testDb = client.getDatabase(db); + assertTrue(testDb.getComment() == null); + assertTrue(testDb.getProperties().size() == 0); + String warehouseDir = System + .getProperty(ConfVars.METASTOREWAREHOUSE.varname, "/user/hive/warehouse"); + assertTrue(testDb.getLocation().equals( + "file:" + warehouseDir + "/" + db + ".db")); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); + cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(db, tableOne, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + HCatTable table1 = client.getTable(db, tableOne); + assertTrue(table1.getInputFileFormat().equalsIgnoreCase( + RCFileInputFormat.class.getName())); + assertTrue(table1.getOutputFileFormat().equalsIgnoreCase( + RCFileOutputFormat.class.getName())); + assertTrue(table1.getSerdeLib().equalsIgnoreCase( + ColumnarSerDe.class.getName())); + assertTrue(table1.getCols().equals(cols)); + // Since "ifexists" was not set to true, trying to create the same table + // again + // will result in an exception. + try { + client.createTable(tableDesc); + } catch (HCatException e) { + assertTrue(e.getMessage().contains( + "AlreadyExistsException while creating table.")); } - @Test - public void testDatabaseLocation() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "locationDB"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) - .ifNotExists(true).location("/tmp/" + dbName).build(); - client.createDatabase(dbDesc); - HCatDatabase newDB = client.getDatabase(dbName); - assertTrue(newDB.getLocation().equalsIgnoreCase("file:/tmp/" + dbName)); - client.close(); + client.dropTable(db, tableOne, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc.create(db, + tableTwo, cols).build(); + client.createTable(tableDesc2); + HCatTable table2 = client.getTable(db, tableTwo); + assertTrue(table2.getInputFileFormat().equalsIgnoreCase( + TextInputFormat.class.getName())); + assertTrue(table2.getOutputFileFormat().equalsIgnoreCase( + IgnoreKeyTextOutputFormat.class.getName())); + assertTrue(table2.getLocation().equalsIgnoreCase( + "file:" + warehouseDir + "/" + db + ".db/" + tableTwo)); + client.close(); + } + + @Test + public void testPartitionsHCatClientImpl() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "ptnDB"; + String tableName = "pageView"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) + .ifNotExists(true).build(); + client.createDatabase(dbDesc); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("userid", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("viewtime", Type.BIGINT, + "view time columns")); + cols.add(new HCatFieldSchema("pageurl", Type.STRING, "")); + cols.add(new HCatFieldSchema("ip", Type.STRING, + "IP Address of the User")); + + ArrayList ptnCols = new ArrayList(); + ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column")); + ptnCols.add(new HCatFieldSchema("country", Type.STRING, + "country column")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(dbName, tableName, cols).fileFormat("sequencefile") + .partCols(ptnCols).build(); + client.createTable(tableDesc); + + Map firstPtn = new HashMap(); + firstPtn.put("dt", "04/30/2012"); + firstPtn.put("country", "usa"); + HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(dbName, + tableName, null, firstPtn).build(); + client.addPartition(addPtn); + + Map secondPtn = new HashMap(); + secondPtn.put("dt", "04/12/2012"); + secondPtn.put("country", "brazil"); + HCatAddPartitionDesc addPtn2 = HCatAddPartitionDesc.create(dbName, + tableName, null, secondPtn).build(); + client.addPartition(addPtn2); + + Map thirdPtn = new HashMap(); + thirdPtn.put("dt", "04/13/2012"); + thirdPtn.put("country", "argentina"); + HCatAddPartitionDesc addPtn3 = HCatAddPartitionDesc.create(dbName, + tableName, null, thirdPtn).build(); + client.addPartition(addPtn3); + + List ptnList = client.listPartitionsByFilter(dbName, + tableName, null); + assertTrue(ptnList.size() == 3); + + HCatPartition ptn = client.getPartition(dbName, tableName, firstPtn); + assertTrue(ptn != null); + + client.dropPartitions(dbName, tableName, firstPtn, true); + ptnList = client.listPartitionsByFilter(dbName, + tableName, null); + assertTrue(ptnList.size() == 2); + + List ptnListTwo = client.listPartitionsByFilter(dbName, + tableName, "country = \"argentina\""); + assertTrue(ptnListTwo.size() == 1); + + client.markPartitionForEvent(dbName, tableName, thirdPtn, + PartitionEventType.LOAD_DONE); + boolean isMarked = client.isPartitionMarkedForEvent(dbName, tableName, + thirdPtn, PartitionEventType.LOAD_DONE); + assertTrue(isMarked); + client.close(); + } + + @Test + public void testDatabaseLocation() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "locationDB"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) + .ifNotExists(true).location("/tmp/" + dbName).build(); + client.createDatabase(dbDesc); + HCatDatabase newDB = client.getDatabase(dbName); + assertTrue(newDB.getLocation().equalsIgnoreCase("file:/tmp/" + dbName)); + client.close(); + } + + @Test + public void testCreateTableLike() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "tableone"; + String cloneTable = "tabletwo"; + client.dropTable(null, tableName, true); + client.dropTable(null, cloneTable, true); + + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + // create a new table similar to previous one. + client.createTableLike(null, tableName, cloneTable, true, false, null); + List tables = client.listTableNamesByPattern(null, "table*"); + assertTrue(tables.size() == 2); + client.close(); + } + + @Test + public void testRenameTable() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "temptable"; + String newName = "mytable"; + client.dropTable(null, tableName, true); + client.dropTable(null, newName, true); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + client.renameTable(null, tableName, newName); + try { + client.getTable(null, tableName); + } catch (HCatException exp) { + assertTrue("Unexpected exception message: " + exp.getMessage(), + exp.getMessage().contains("NoSuchObjectException while fetching table")); } - - @Test - public void testCreateTableLike() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "tableone"; - String cloneTable = "tabletwo"; - client.dropTable(null, tableName, true); - client.dropTable(null, cloneTable, true); - - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - // create a new table similar to previous one. - client.createTableLike(null, tableName, cloneTable, true, false, null); - List tables = client.listTableNamesByPattern(null, "table*"); - assertTrue(tables.size() == 2); - client.close(); + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equals(newName)); + client.close(); + } + + @Test + public void testTransportFailure() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + boolean isExceptionCaught = false; + // Table creation with a long table name causes ConnectionFailureException + final String tableName = "Temptable" + new BigInteger(200, new Random()).toString(2); + + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + try { + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + } catch (Exception exp) { + isExceptionCaught = true; + assertEquals("Unexpected exception type.", HCatException.class, exp.getClass()); + // The connection was closed, so create a new one. + client = HCatClient.create(new Configuration(hcatConf)); + String newName = "goodTable"; + client.dropTable(null, newName, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc + .create(null, newName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc2); + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); + + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); } - - @Test - public void testRenameTable() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "temptable"; - String newName = "mytable"; - client.dropTable(null, tableName, true); - client.dropTable(null, newName, true); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - client.renameTable(null, tableName, newName); - try { - client.getTable(null, tableName); - } catch (HCatException exp) { - assertTrue("Unexpected exception message: " + exp.getMessage(), - exp.getMessage().contains("NoSuchObjectException while fetching table")); - } - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equals(newName)); - client.close(); + } + + @Test + public void testOtherFailure() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "Temptable"; + boolean isExceptionCaught = false; + client.dropTable(null, tableName, true); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + try { + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + // The DB foo is non-existent. + client.getTable("foo", tableName); + } catch (Exception exp) { + isExceptionCaught = true; + assertTrue(exp instanceof HCatException); + String newName = "goodTable"; + client.dropTable(null, newName, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc + .create(null, newName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc2); + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); } - - @Test - public void testTransportFailure() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - boolean isExceptionCaught = false; - // Table creation with a long table name causes ConnectionFailureException - final String tableName = "Temptable" + new BigInteger(200, new Random()).toString(2); - - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - try { - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - } catch (Exception exp) { - isExceptionCaught = true; - assertEquals("Unexpected exception type.", HCatException.class, exp.getClass()); - // The connection was closed, so create a new one. - client = HCatClient.create(new Configuration(hcatConf)); - String newName = "goodTable"; - client.dropTable(null, newName, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc - .create(null, newName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc2); - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); - - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } + } + + @Test + public void testDropTableException() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "tableToBeDropped"; + boolean isExceptionCaught = false; + client.dropTable(null, tableName, true); + try { + client.dropTable(null, tableName, false); + } catch (Exception exp) { + isExceptionCaught = true; + assertTrue(exp instanceof HCatException); + LOG.info("Drop Table Exception: " + exp.getCause()); + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); } + } - @Test - public void testOtherFailure() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "Temptable"; - boolean isExceptionCaught = false; - client.dropTable(null, tableName, true); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - try { - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - // The DB foo is non-existent. - client.getTable("foo", tableName); - } catch (Exception exp) { - isExceptionCaught = true; - assertTrue(exp instanceof HCatException); - String newName = "goodTable"; - client.dropTable(null, newName, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc - .create(null, newName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc2); - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } - } + @Test + public void testUpdateTableSchema() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "testUpdateTableSchema_DBName"; + final String tableName = "testUpdateTableSchema_TableName"; - @Test - public void testDropTableException() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "tableToBeDropped"; - boolean isExceptionCaught = false; - client.dropTable(null, tableName, true); - try { - client.dropTable(null, tableName, false); - } catch (Exception exp) { - isExceptionCaught = true; - assertTrue(exp instanceof HCatException); - LOG.info("Drop Table Exception: " + exp.getCause()); - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } - } + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - @Test - public void testUpdateTableSchema() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "testUpdateTableSchema_DBName"; - final String tableName = "testUpdateTableSchema_TableName"; + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List oldSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, oldSchema).build()); - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + List newSchema = Arrays.asList(new HCatFieldSchema("completely", Type.DOUBLE, ""), + new HCatFieldSchema("new", Type.FLOAT, ""), + new HCatFieldSchema("fields", Type.STRING, "")); - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List oldSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, oldSchema).build()); + client.updateTableSchema(dbName, tableName, newSchema); - List newSchema = Arrays.asList(new HCatFieldSchema("completely", Type.DOUBLE, ""), - new HCatFieldSchema("new", Type.FLOAT, ""), - new HCatFieldSchema("fields", Type.STRING, "")); + assertArrayEquals(newSchema.toArray(), client.getTable(dbName, tableName).getCols().toArray()); - client.updateTableSchema(dbName, tableName, newSchema); - - assertArrayEquals(newSchema.toArray(), client.getTable(dbName, tableName).getCols().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception exception) { - LOG.error("Unexpected exception.", exception); - assertTrue("Unexpected exception: " + exception.getMessage(), false); - } + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } - - @Test - public void testObjectNotFoundException() throws Exception { - try { - - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "testObjectNotFoundException_DBName"; - String tableName = "testObjectNotFoundException_TableName"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - try { // Test that fetching a non-existent db-name yields ObjectNotFound. - client.getDatabase(dbName); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - - try { // Test that fetching a non-existent table-name yields ObjectNotFound. - client.getTable(dbName, tableName); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - String partitionColumn = "part"; - - List columns = Arrays.asList(new HCatFieldSchema("col", Type.STRING, "")); - ArrayList partitionColumns = new ArrayList( - Arrays.asList(new HCatFieldSchema(partitionColumn, Type.STRING, ""))); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columns) - .partCols(partitionColumns) - .build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put(partitionColumn, "foobar"); - try { // Test that fetching a non-existent partition yields ObjectNotFound. - client.getPartition(dbName, tableName, partitionSpec); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - // Test that listPartitionsByFilter() returns an empty-set, if the filter selects no partitions. - assertEquals("Expected empty set of partitions.", - 0, client.listPartitionsByFilter(dbName, tableName, partitionColumn + " < 'foobar'").size()); - - try { // Test that listPartitionsByFilter() throws HCatException if the partition-key is incorrect. - partitionSpec.put("NonExistentKey", "foobar"); - client.getPartition(dbName, tableName, partitionSpec); - assertTrue("Expected HCatException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected HCatException. Got:" + exception.getClass(), - exception instanceof HCatException); - assertFalse("Did not expect ObjectNotFoundException.", exception instanceof ObjectNotFoundException); - } - - } - catch (Throwable t) { - LOG.error("Unexpected exception!", t); - assertTrue("Unexpected exception! " + t.getMessage(), false); - } + catch (Exception exception) { + LOG.error("Unexpected exception.", exception); + assertTrue("Unexpected exception: " + exception.getMessage(), false); } + } + + @Test + public void testObjectNotFoundException() throws Exception { + try { + + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "testObjectNotFoundException_DBName"; + String tableName = "testObjectNotFoundException_TableName"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + try { // Test that fetching a non-existent db-name yields ObjectNotFound. + client.getDatabase(dbName); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + + try { // Test that fetching a non-existent table-name yields ObjectNotFound. + client.getTable(dbName, tableName); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + String partitionColumn = "part"; + + List columns = Arrays.asList(new HCatFieldSchema("col", Type.STRING, "")); + ArrayList partitionColumns = new ArrayList( + Arrays.asList(new HCatFieldSchema(partitionColumn, Type.STRING, ""))); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columns) + .partCols(partitionColumns) + .build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put(partitionColumn, "foobar"); + try { // Test that fetching a non-existent partition yields ObjectNotFound. + client.getPartition(dbName, tableName, partitionSpec); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + // Test that listPartitionsByFilter() returns an empty-set, if the filter selects no partitions. + assertEquals("Expected empty set of partitions.", + 0, client.listPartitionsByFilter(dbName, tableName, partitionColumn + " < 'foobar'").size()); + + try { // Test that listPartitionsByFilter() throws HCatException if the partition-key is incorrect. + partitionSpec.put("NonExistentKey", "foobar"); + client.getPartition(dbName, tableName, partitionSpec); + assertTrue("Expected HCatException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected HCatException. Got:" + exception.getClass(), + exception instanceof HCatException); + assertFalse("Did not expect ObjectNotFoundException.", exception instanceof ObjectNotFoundException); + } - @Test - public void testGetMessageBusTopicName() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "testGetMessageBusTopicName_DBName"; - String tableName = "testGetMessageBusTopicName_TableName"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - String messageBusTopicName = "MY.topic.name"; - Map tableProperties = new HashMap(1); - tableProperties.put(HCatConstants.HCAT_MSGBUS_TOPIC_NAME, messageBusTopicName); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, Arrays.asList(new HCatFieldSchema("foo", Type.STRING, ""))).tblProps(tableProperties).build()); - - assertEquals("MessageBus topic-name doesn't match!", messageBusTopicName, client.getMessageBusTopicName(dbName, tableName)); - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.close(); - } - catch (Exception exception) { - LOG.error("Unexpected exception.", exception); - assertTrue("Unexpected exception:" + exception.getMessage(), false); - } } + catch (Throwable t) { + LOG.error("Unexpected exception!", t); + assertTrue("Unexpected exception! " + t.getMessage(), false); + } + } + + @Test + public void testGetMessageBusTopicName() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "testGetMessageBusTopicName_DBName"; + String tableName = "testGetMessageBusTopicName_TableName"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + String messageBusTopicName = "MY.topic.name"; + Map tableProperties = new HashMap(1); + tableProperties.put(HCatConstants.HCAT_MSGBUS_TOPIC_NAME, messageBusTopicName); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, Arrays.asList(new HCatFieldSchema("foo", Type.STRING, ""))).tblProps(tableProperties).build()); + + assertEquals("MessageBus topic-name doesn't match!", messageBusTopicName, client.getMessageBusTopicName(dbName, tableName)); + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.close(); + } + catch (Exception exception) { + LOG.error("Unexpected exception.", exception); + assertTrue("Unexpected exception:" + exception.getMessage(), false); + } + } - @Test - public void testPartitionSchema() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; + @Test + public void testPartitionSchema() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(partitionSchema).build()); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(partitionSchema).build()); - HCatTable table = client.getTable(dbName, tableName); - List partitionColumns = table.getPartCols(); + HCatTable table = client.getTable(dbName, tableName); + List partitionColumns = table.getPartCols(); - assertArrayEquals("Didn't get expected partition-schema back from the HCatTable.", - partitionSchema.toArray(), partitionColumns.toArray()); - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } + assertArrayEquals("Didn't get expected partition-schema back from the HCatTable.", + partitionSchema.toArray(), partitionColumns.toArray()); + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } - - @Test - public void testGetPartitionsWithPartialSpec() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); - - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2011_12_31"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2012_01_01"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "OB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "XB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - Map partialPartitionSpec = new HashMap(); - partialPartitionSpec.put("dt", "2012_01_01"); - - List partitions = client.getPartitions(dbName, tableName, partialPartitionSpec); - assertEquals("Unexpected number of partitions.", 3, partitions.size()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "AB"}, partitions.get(0).getValues().toArray()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "OB"}, partitions.get(1).getValues().toArray()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "XB"}, partitions.get(2).getValues().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } - - @Test - public void testDropPartitionsWithPartialSpec() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); - - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2011_12_31"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2012_01_01"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "OB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "XB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - Map partialPartitionSpec = new HashMap(); - partialPartitionSpec.put("dt", "2012_01_01"); - - client.dropPartitions(dbName, tableName, partialPartitionSpec, true); - - List partitions = client.getPartitions(dbName, tableName); - assertEquals("Unexpected number of partitions.", 1, partitions.size()); - assertArrayEquals("Mismatched partition.", new String[]{"2011_12_31", "AB"}, partitions.get(0).getValues().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } + } + + @Test + public void testGetPartitionsWithPartialSpec() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); + + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2011_12_31"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2012_01_01"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "OB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "XB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + Map partialPartitionSpec = new HashMap(); + partialPartitionSpec.put("dt", "2012_01_01"); + + List partitions = client.getPartitions(dbName, tableName, partialPartitionSpec); + assertEquals("Unexpected number of partitions.", 3, partitions.size()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "AB"}, partitions.get(0).getValues().toArray()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "OB"}, partitions.get(1).getValues().toArray()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "XB"}, partitions.get(2).getValues().toArray()); + + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); + } + } + + @Test + public void testDropPartitionsWithPartialSpec() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); + + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2011_12_31"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2012_01_01"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "OB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "XB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + Map partialPartitionSpec = new HashMap(); + partialPartitionSpec.put("dt", "2012_01_01"); + + client.dropPartitions(dbName, tableName, partialPartitionSpec, true); + + List partitions = client.getPartitions(dbName, tableName); + assertEquals("Unexpected number of partitions.", 1, partitions.size()); + assertArrayEquals("Mismatched partition.", new String[]{"2011_12_31", "AB"}, partitions.get(0).getValues().toArray()); + + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } + } } diff --git a/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java b/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java index 357856c..b18f8c8 100644 --- a/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java +++ b/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java @@ -54,587 +54,587 @@ import static org.junit.Assert.assertArrayEquals; public class TestHCatClient { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); - private static final String msPort = "20101"; - private static HiveConf hcatConf; - private static SecurityManager securityManager; - - private static class RunMS implements Runnable { - - @Override - public void run() { - try { - HiveMetaStore.main(new String[]{"-v", "-p", msPort}); - } catch (Throwable t) { - LOG.error("Exiting. Got exception from metastore: ", t); - } - } + private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); + private static final String msPort = "20101"; + private static HiveConf hcatConf; + private static SecurityManager securityManager; + + private static class RunMS implements Runnable { + + @Override + public void run() { + try { + HiveMetaStore.main(new String[]{"-v", "-p", msPort}); + } catch (Throwable t) { + LOG.error("Exiting. Got exception from metastore: ", t); + } } - - @AfterClass - public static void tearDown() throws Exception { - LOG.info("Shutting down metastore."); - System.setSecurityManager(securityManager); - } - - @BeforeClass - public static void startMetaStoreServer() throws Exception { - - Thread t = new Thread(new RunMS()); - t.start(); - Thread.sleep(40000); - - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - hcatConf = new HiveConf(TestHCatClient.class); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + @AfterClass + public static void tearDown() throws Exception { + LOG.info("Shutting down metastore."); + System.setSecurityManager(securityManager); + } + + @BeforeClass + public static void startMetaStoreServer() throws Exception { + + Thread t = new Thread(new RunMS()); + t.start(); + Thread.sleep(40000); + + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + hcatConf = new HiveConf(TestHCatClient.class); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + @Test + public void testBasicDDLCommands() throws Exception { + String db = "testdb"; + String tableOne = "testTable1"; + String tableTwo = "testTable2"; + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(db).ifNotExists(false) + .build(); + client.createDatabase(dbDesc); + List dbNames = client.listDatabaseNamesByPattern("*"); + assertTrue(dbNames.contains("default")); + assertTrue(dbNames.contains(db)); + + HCatDatabase testDb = client.getDatabase(db); + assertTrue(testDb.getComment() == null); + assertTrue(testDb.getProperties().size() == 0); + String warehouseDir = System + .getProperty(ConfVars.METASTOREWAREHOUSE.varname, "/user/hive/warehouse"); + String expectedDir = warehouseDir.replaceAll("\\\\", "/"); + if (!expectedDir.startsWith("/")) { + expectedDir = "/" + expectedDir; } - - @Test - public void testBasicDDLCommands() throws Exception { - String db = "testdb"; - String tableOne = "testTable1"; - String tableTwo = "testTable2"; - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(db).ifNotExists(false) - .build(); - client.createDatabase(dbDesc); - List dbNames = client.listDatabaseNamesByPattern("*"); - assertTrue(dbNames.contains("default")); - assertTrue(dbNames.contains(db)); - - HCatDatabase testDb = client.getDatabase(db); - assertTrue(testDb.getComment() == null); - assertTrue(testDb.getProperties().size() == 0); - String warehouseDir = System - .getProperty(ConfVars.METASTOREWAREHOUSE.varname, "/user/hive/warehouse"); - String expectedDir = warehouseDir.replaceAll("\\\\", "/"); - if (!expectedDir.startsWith("/")) { - expectedDir = "/" + expectedDir; - } - assertTrue(testDb.getLocation().equals( - "file:" + expectedDir + "/" + db + ".db")); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); - cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(db, tableOne, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - HCatTable table1 = client.getTable(db, tableOne); - assertTrue(table1.getInputFileFormat().equalsIgnoreCase( - RCFileInputFormat.class.getName())); - assertTrue(table1.getOutputFileFormat().equalsIgnoreCase( - RCFileOutputFormat.class.getName())); - assertTrue(table1.getSerdeLib().equalsIgnoreCase( - ColumnarSerDe.class.getName())); - assertTrue(table1.getCols().equals(cols)); - // Since "ifexists" was not set to true, trying to create the same table - // again - // will result in an exception. - try { - client.createTable(tableDesc); - } catch (HCatException e) { - assertTrue(e.getMessage().contains( - "AlreadyExistsException while creating table.")); - } - - client.dropTable(db, tableOne, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc.create(db, - tableTwo, cols).build(); - client.createTable(tableDesc2); - HCatTable table2 = client.getTable(db, tableTwo); - assertTrue(table2.getInputFileFormat().equalsIgnoreCase( - TextInputFormat.class.getName())); - assertTrue(table2.getOutputFileFormat().equalsIgnoreCase( - IgnoreKeyTextOutputFormat.class.getName())); - assertTrue(table2.getLocation().equalsIgnoreCase( - "file:" + expectedDir + "/" + db + ".db/" + tableTwo)); - client.close(); - } - - @Test - public void testPartitionsHCatClientImpl() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "ptnDB"; - String tableName = "pageView"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) - .ifNotExists(true).build(); - client.createDatabase(dbDesc); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("userid", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("viewtime", Type.BIGINT, - "view time columns")); - cols.add(new HCatFieldSchema("pageurl", Type.STRING, "")); - cols.add(new HCatFieldSchema("ip", Type.STRING, - "IP Address of the User")); - - ArrayList ptnCols = new ArrayList(); - ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column")); - ptnCols.add(new HCatFieldSchema("country", Type.STRING, - "country column")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(dbName, tableName, cols).fileFormat("sequencefile") - .partCols(ptnCols).build(); - client.createTable(tableDesc); - - Map firstPtn = new HashMap(); - firstPtn.put("dt", "04/30/2012"); - firstPtn.put("country", "usa"); - HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(dbName, - tableName, null, firstPtn).build(); - client.addPartition(addPtn); - - Map secondPtn = new HashMap(); - secondPtn.put("dt", "04/12/2012"); - secondPtn.put("country", "brazil"); - HCatAddPartitionDesc addPtn2 = HCatAddPartitionDesc.create(dbName, - tableName, null, secondPtn).build(); - client.addPartition(addPtn2); - - Map thirdPtn = new HashMap(); - thirdPtn.put("dt", "04/13/2012"); - thirdPtn.put("country", "argentina"); - HCatAddPartitionDesc addPtn3 = HCatAddPartitionDesc.create(dbName, - tableName, null, thirdPtn).build(); - client.addPartition(addPtn3); - - List ptnList = client.listPartitionsByFilter(dbName, - tableName, null); - assertTrue(ptnList.size() == 3); - - HCatPartition ptn = client.getPartition(dbName, tableName, firstPtn); - assertTrue(ptn != null); - - client.dropPartitions(dbName, tableName, firstPtn, true); - ptnList = client.listPartitionsByFilter(dbName, - tableName, null); - assertTrue(ptnList.size() == 2); - - List ptnListTwo = client.listPartitionsByFilter(dbName, - tableName, "country = \"argentina\""); - assertTrue(ptnListTwo.size() == 1); - - client.markPartitionForEvent(dbName, tableName, thirdPtn, - PartitionEventType.LOAD_DONE); - boolean isMarked = client.isPartitionMarkedForEvent(dbName, tableName, - thirdPtn, PartitionEventType.LOAD_DONE); - assertTrue(isMarked); - client.close(); + assertTrue(testDb.getLocation().equals( + "file:" + expectedDir + "/" + db + ".db")); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); + cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(db, tableOne, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + HCatTable table1 = client.getTable(db, tableOne); + assertTrue(table1.getInputFileFormat().equalsIgnoreCase( + RCFileInputFormat.class.getName())); + assertTrue(table1.getOutputFileFormat().equalsIgnoreCase( + RCFileOutputFormat.class.getName())); + assertTrue(table1.getSerdeLib().equalsIgnoreCase( + ColumnarSerDe.class.getName())); + assertTrue(table1.getCols().equals(cols)); + // Since "ifexists" was not set to true, trying to create the same table + // again + // will result in an exception. + try { + client.createTable(tableDesc); + } catch (HCatException e) { + assertTrue(e.getMessage().contains( + "AlreadyExistsException while creating table.")); } - @Test - public void testDatabaseLocation() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "locationDB"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) - .ifNotExists(true).location("/tmp/" + dbName).build(); - client.createDatabase(dbDesc); - HCatDatabase newDB = client.getDatabase(dbName); - assertTrue(newDB.getLocation().equalsIgnoreCase("file:/tmp/" + dbName)); - client.close(); + client.dropTable(db, tableOne, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc.create(db, + tableTwo, cols).build(); + client.createTable(tableDesc2); + HCatTable table2 = client.getTable(db, tableTwo); + assertTrue(table2.getInputFileFormat().equalsIgnoreCase( + TextInputFormat.class.getName())); + assertTrue(table2.getOutputFileFormat().equalsIgnoreCase( + IgnoreKeyTextOutputFormat.class.getName())); + assertTrue(table2.getLocation().equalsIgnoreCase( + "file:" + expectedDir + "/" + db + ".db/" + tableTwo)); + client.close(); + } + + @Test + public void testPartitionsHCatClientImpl() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "ptnDB"; + String tableName = "pageView"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) + .ifNotExists(true).build(); + client.createDatabase(dbDesc); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("userid", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("viewtime", Type.BIGINT, + "view time columns")); + cols.add(new HCatFieldSchema("pageurl", Type.STRING, "")); + cols.add(new HCatFieldSchema("ip", Type.STRING, + "IP Address of the User")); + + ArrayList ptnCols = new ArrayList(); + ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column")); + ptnCols.add(new HCatFieldSchema("country", Type.STRING, + "country column")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(dbName, tableName, cols).fileFormat("sequencefile") + .partCols(ptnCols).build(); + client.createTable(tableDesc); + + Map firstPtn = new HashMap(); + firstPtn.put("dt", "04/30/2012"); + firstPtn.put("country", "usa"); + HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(dbName, + tableName, null, firstPtn).build(); + client.addPartition(addPtn); + + Map secondPtn = new HashMap(); + secondPtn.put("dt", "04/12/2012"); + secondPtn.put("country", "brazil"); + HCatAddPartitionDesc addPtn2 = HCatAddPartitionDesc.create(dbName, + tableName, null, secondPtn).build(); + client.addPartition(addPtn2); + + Map thirdPtn = new HashMap(); + thirdPtn.put("dt", "04/13/2012"); + thirdPtn.put("country", "argentina"); + HCatAddPartitionDesc addPtn3 = HCatAddPartitionDesc.create(dbName, + tableName, null, thirdPtn).build(); + client.addPartition(addPtn3); + + List ptnList = client.listPartitionsByFilter(dbName, + tableName, null); + assertTrue(ptnList.size() == 3); + + HCatPartition ptn = client.getPartition(dbName, tableName, firstPtn); + assertTrue(ptn != null); + + client.dropPartitions(dbName, tableName, firstPtn, true); + ptnList = client.listPartitionsByFilter(dbName, + tableName, null); + assertTrue(ptnList.size() == 2); + + List ptnListTwo = client.listPartitionsByFilter(dbName, + tableName, "country = \"argentina\""); + assertTrue(ptnListTwo.size() == 1); + + client.markPartitionForEvent(dbName, tableName, thirdPtn, + PartitionEventType.LOAD_DONE); + boolean isMarked = client.isPartitionMarkedForEvent(dbName, tableName, + thirdPtn, PartitionEventType.LOAD_DONE); + assertTrue(isMarked); + client.close(); + } + + @Test + public void testDatabaseLocation() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "locationDB"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) + .ifNotExists(true).location("/tmp/" + dbName).build(); + client.createDatabase(dbDesc); + HCatDatabase newDB = client.getDatabase(dbName); + assertTrue(newDB.getLocation().equalsIgnoreCase("file:/tmp/" + dbName)); + client.close(); + } + + @Test + public void testCreateTableLike() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "tableone"; + String cloneTable = "tabletwo"; + client.dropTable(null, tableName, true); + client.dropTable(null, cloneTable, true); + + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + // create a new table similar to previous one. + client.createTableLike(null, tableName, cloneTable, true, false, null); + List tables = client.listTableNamesByPattern(null, "table*"); + assertTrue(tables.size() == 2); + client.close(); + } + + @Test + public void testRenameTable() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "temptable"; + String newName = "mytable"; + client.dropTable(null, tableName, true); + client.dropTable(null, newName, true); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + client.renameTable(null, tableName, newName); + try { + client.getTable(null, tableName); + } catch (HCatException exp) { + assertTrue("Unexpected exception message: " + exp.getMessage(), + exp.getMessage().contains("NoSuchObjectException while fetching table")); } - - @Test - public void testCreateTableLike() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "tableone"; - String cloneTable = "tabletwo"; - client.dropTable(null, tableName, true); - client.dropTable(null, cloneTable, true); - - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - // create a new table similar to previous one. - client.createTableLike(null, tableName, cloneTable, true, false, null); - List tables = client.listTableNamesByPattern(null, "table*"); - assertTrue(tables.size() == 2); - client.close(); - } - - @Test - public void testRenameTable() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "temptable"; - String newName = "mytable"; - client.dropTable(null, tableName, true); - client.dropTable(null, newName, true); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - client.renameTable(null, tableName, newName); - try { - client.getTable(null, tableName); - } catch (HCatException exp) { - assertTrue("Unexpected exception message: " + exp.getMessage(), - exp.getMessage().contains("NoSuchObjectException while fetching table")); - } - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equals(newName)); - client.close(); - } - - @Test - public void testTransportFailure() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - boolean isExceptionCaught = false; - // Table creation with a long table name causes ConnectionFailureException - final String tableName = "Temptable" + new BigInteger(200, new Random()).toString(2); - - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - try { - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - } catch (Exception exp) { - isExceptionCaught = true; - assertEquals("Unexpected exception type.", HCatException.class, exp.getClass()); - // The connection was closed, so create a new one. - client = HCatClient.create(new Configuration(hcatConf)); - String newName = "goodTable"; - client.dropTable(null, newName, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc - .create(null, newName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc2); - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); - - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equals(newName)); + client.close(); + } + + @Test + public void testTransportFailure() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + boolean isExceptionCaught = false; + // Table creation with a long table name causes ConnectionFailureException + final String tableName = "Temptable" + new BigInteger(200, new Random()).toString(2); + + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + try { + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + } catch (Exception exp) { + isExceptionCaught = true; + assertEquals("Unexpected exception type.", HCatException.class, exp.getClass()); + // The connection was closed, so create a new one. + client = HCatClient.create(new Configuration(hcatConf)); + String newName = "goodTable"; + client.dropTable(null, newName, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc + .create(null, newName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc2); + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); + + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); } - - @Test - public void testOtherFailure() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "Temptable"; - boolean isExceptionCaught = false; - client.dropTable(null, tableName, true); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - try { - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - // The DB foo is non-existent. - client.getTable("foo", tableName); - } catch (Exception exp) { - isExceptionCaught = true; - assertTrue(exp instanceof HCatException); - String newName = "goodTable"; - client.dropTable(null, newName, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc - .create(null, newName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc2); - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } + } + + @Test + public void testOtherFailure() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "Temptable"; + boolean isExceptionCaught = false; + client.dropTable(null, tableName, true); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + try { + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + // The DB foo is non-existent. + client.getTable("foo", tableName); + } catch (Exception exp) { + isExceptionCaught = true; + assertTrue(exp instanceof HCatException); + String newName = "goodTable"; + client.dropTable(null, newName, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc + .create(null, newName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc2); + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); } - - @Test - public void testDropTableException() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "tableToBeDropped"; - boolean isExceptionCaught = false; - client.dropTable(null, tableName, true); - try { - client.dropTable(null, tableName, false); - } catch (Exception exp) { - isExceptionCaught = true; - assertTrue(exp instanceof HCatException); - LOG.info("Drop Table Exception: " + exp.getCause()); - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } + } + + @Test + public void testDropTableException() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "tableToBeDropped"; + boolean isExceptionCaught = false; + client.dropTable(null, tableName, true); + try { + client.dropTable(null, tableName, false); + } catch (Exception exp) { + isExceptionCaught = true; + assertTrue(exp instanceof HCatException); + LOG.info("Drop Table Exception: " + exp.getCause()); + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); } + } - @Test - public void testUpdateTableSchema() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "testUpdateTableSchema_DBName"; - final String tableName = "testUpdateTableSchema_TableName"; + @Test + public void testUpdateTableSchema() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "testUpdateTableSchema_DBName"; + final String tableName = "testUpdateTableSchema_TableName"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List oldSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, oldSchema).build()); + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List oldSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, oldSchema).build()); - List newSchema = Arrays.asList(new HCatFieldSchema("completely", Type.DOUBLE, ""), - new HCatFieldSchema("new", Type.FLOAT, ""), - new HCatFieldSchema("fields", Type.STRING, "")); + List newSchema = Arrays.asList(new HCatFieldSchema("completely", Type.DOUBLE, ""), + new HCatFieldSchema("new", Type.FLOAT, ""), + new HCatFieldSchema("fields", Type.STRING, "")); - client.updateTableSchema(dbName, tableName, newSchema); + client.updateTableSchema(dbName, tableName, newSchema); - assertArrayEquals(newSchema.toArray(), client.getTable(dbName, tableName).getCols().toArray()); + assertArrayEquals(newSchema.toArray(), client.getTable(dbName, tableName).getCols().toArray()); - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception exception) { - LOG.error("Unexpected exception.", exception); - assertTrue("Unexpected exception: " + exception.getMessage(), false); - } + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } - - @Test - public void testObjectNotFoundException() throws Exception { - try { - - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "testObjectNotFoundException_DBName"; - String tableName = "testObjectNotFoundException_TableName"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - try { // Test that fetching a non-existent db-name yields ObjectNotFound. - client.getDatabase(dbName); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - - try { // Test that fetching a non-existent table-name yields ObjectNotFound. - client.getTable(dbName, tableName); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - String partitionColumn = "part"; - - List columns = Arrays.asList(new HCatFieldSchema("col", Type.STRING, "")); - ArrayList partitionColumns = new ArrayList( - Arrays.asList(new HCatFieldSchema(partitionColumn, Type.STRING, ""))); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columns) - .partCols(partitionColumns) - .build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put(partitionColumn, "foobar"); - try { // Test that fetching a non-existent partition yields ObjectNotFound. - client.getPartition(dbName, tableName, partitionSpec); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - // Test that listPartitionsByFilter() returns an empty-set, if the filter selects no partitions. - assertEquals("Expected empty set of partitions.", - 0, client.listPartitionsByFilter(dbName, tableName, partitionColumn + " < 'foobar'").size()); - - try { // Test that listPartitionsByFilter() throws HCatException if the partition-key is incorrect. - partitionSpec.put("NonExistentKey", "foobar"); - client.getPartition(dbName, tableName, partitionSpec); - assertTrue("Expected HCatException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected HCatException. Got:" + exception.getClass(), - exception instanceof HCatException); - assertFalse("Did not expect ObjectNotFoundException.", exception instanceof ObjectNotFoundException); - } - - } - catch (Throwable t) { - LOG.error("Unexpected exception!", t); - assertTrue("Unexpected exception! " + t.getMessage(), false); - } + catch (Exception exception) { + LOG.error("Unexpected exception.", exception); + assertTrue("Unexpected exception: " + exception.getMessage(), false); } + } + + @Test + public void testObjectNotFoundException() throws Exception { + try { + + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "testObjectNotFoundException_DBName"; + String tableName = "testObjectNotFoundException_TableName"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + try { // Test that fetching a non-existent db-name yields ObjectNotFound. + client.getDatabase(dbName); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + + try { // Test that fetching a non-existent table-name yields ObjectNotFound. + client.getTable(dbName, tableName); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + String partitionColumn = "part"; + + List columns = Arrays.asList(new HCatFieldSchema("col", Type.STRING, "")); + ArrayList partitionColumns = new ArrayList( + Arrays.asList(new HCatFieldSchema(partitionColumn, Type.STRING, ""))); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columns) + .partCols(partitionColumns) + .build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put(partitionColumn, "foobar"); + try { // Test that fetching a non-existent partition yields ObjectNotFound. + client.getPartition(dbName, tableName, partitionSpec); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + // Test that listPartitionsByFilter() returns an empty-set, if the filter selects no partitions. + assertEquals("Expected empty set of partitions.", + 0, client.listPartitionsByFilter(dbName, tableName, partitionColumn + " < 'foobar'").size()); + + try { // Test that listPartitionsByFilter() throws HCatException if the partition-key is incorrect. + partitionSpec.put("NonExistentKey", "foobar"); + client.getPartition(dbName, tableName, partitionSpec); + assertTrue("Expected HCatException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected HCatException. Got:" + exception.getClass(), + exception instanceof HCatException); + assertFalse("Did not expect ObjectNotFoundException.", exception instanceof ObjectNotFoundException); + } - @Test - public void testGetMessageBusTopicName() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "testGetMessageBusTopicName_DBName"; - String tableName = "testGetMessageBusTopicName_TableName"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - String messageBusTopicName = "MY.topic.name"; - Map tableProperties = new HashMap(1); - tableProperties.put(HCatConstants.HCAT_MSGBUS_TOPIC_NAME, messageBusTopicName); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, Arrays.asList(new HCatFieldSchema("foo", Type.STRING, ""))).tblProps(tableProperties).build()); - - assertEquals("MessageBus topic-name doesn't match!", messageBusTopicName, client.getMessageBusTopicName(dbName, tableName)); - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.close(); - } - catch (Exception exception) { - LOG.error("Unexpected exception.", exception); - assertTrue("Unexpected exception:" + exception.getMessage(), false); - } } + catch (Throwable t) { + LOG.error("Unexpected exception!", t); + assertTrue("Unexpected exception! " + t.getMessage(), false); + } + } + + @Test + public void testGetMessageBusTopicName() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "testGetMessageBusTopicName_DBName"; + String tableName = "testGetMessageBusTopicName_TableName"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + String messageBusTopicName = "MY.topic.name"; + Map tableProperties = new HashMap(1); + tableProperties.put(HCatConstants.HCAT_MSGBUS_TOPIC_NAME, messageBusTopicName); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, Arrays.asList(new HCatFieldSchema("foo", Type.STRING, ""))).tblProps(tableProperties).build()); + + assertEquals("MessageBus topic-name doesn't match!", messageBusTopicName, client.getMessageBusTopicName(dbName, tableName)); + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.close(); + } + catch (Exception exception) { + LOG.error("Unexpected exception.", exception); + assertTrue("Unexpected exception:" + exception.getMessage(), false); + } + } - @Test - public void testPartitionSchema() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; + @Test + public void testPartitionSchema() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(partitionSchema).build()); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(partitionSchema).build()); - HCatTable table = client.getTable(dbName, tableName); - List partitionColumns = table.getPartCols(); + HCatTable table = client.getTable(dbName, tableName); + List partitionColumns = table.getPartCols(); - assertArrayEquals("Didn't get expected partition-schema back from the HCatTable.", - partitionSchema.toArray(), partitionColumns.toArray()); - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } + assertArrayEquals("Didn't get expected partition-schema back from the HCatTable.", + partitionSchema.toArray(), partitionColumns.toArray()); + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); } - - @Test - public void testGetPartitionsWithPartialSpec() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); - - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2011_12_31"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2012_01_01"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "OB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "XB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - Map partialPartitionSpec = new HashMap(); - partialPartitionSpec.put("dt", "2012_01_01"); - - List partitions = client.getPartitions(dbName, tableName, partialPartitionSpec); - assertEquals("Unexpected number of partitions.", 3, partitions.size()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "AB"}, partitions.get(0).getValues().toArray()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "OB"}, partitions.get(1).getValues().toArray()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "XB"}, partitions.get(2).getValues().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } - - @Test - public void testDropPartitionsWithPartialSpec() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); - - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2011_12_31"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2012_01_01"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "OB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "XB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - Map partialPartitionSpec = new HashMap(); - partialPartitionSpec.put("dt", "2012_01_01"); - - client.dropPartitions(dbName, tableName, partialPartitionSpec, true); - - List partitions = client.getPartitions(dbName, tableName); - assertEquals("Unexpected number of partitions.", 1, partitions.size()); - assertArrayEquals("Mismatched partition.", new String[]{"2011_12_31", "AB"}, partitions.get(0).getValues().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } + } + + @Test + public void testGetPartitionsWithPartialSpec() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); + + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2011_12_31"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2012_01_01"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "OB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "XB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + Map partialPartitionSpec = new HashMap(); + partialPartitionSpec.put("dt", "2012_01_01"); + + List partitions = client.getPartitions(dbName, tableName, partialPartitionSpec); + assertEquals("Unexpected number of partitions.", 3, partitions.size()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "AB"}, partitions.get(0).getValues().toArray()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "OB"}, partitions.get(1).getValues().toArray()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "XB"}, partitions.get(2).getValues().toArray()); + + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); + } + } + + @Test + public void testDropPartitionsWithPartialSpec() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); + + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2011_12_31"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2012_01_01"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "OB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "XB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + Map partialPartitionSpec = new HashMap(); + partialPartitionSpec.put("dt", "2012_01_01"); + + client.dropPartitions(dbName, tableName, partialPartitionSpec, true); + + List partitions = client.getPartitions(dbName, tableName); + assertEquals("Unexpected number of partitions.", 1, partitions.size()); + assertArrayEquals("Mismatched partition.", new String[]{"2011_12_31", "AB"}, partitions.get(0).getValues().toArray()); + + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java index 3d82f69..5f5ee54 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java @@ -62,162 +62,162 @@ * */ public class AppConfig extends Configuration { - public static final String[] HADOOP_CONF_FILENAMES = { - "core-default.xml", "core-site.xml", "mapred-default.xml", "mapred-site.xml", "hdfs-site.xml" - }; - - public static final String[] HADOOP_PREFIX_VARS = { - "HADOOP_PREFIX", "HADOOP_HOME" - }; - - public static final String TEMPLETON_HOME_VAR = "TEMPLETON_HOME"; - - public static final String[] TEMPLETON_CONF_FILENAMES = { - "webhcat-default.xml", - "webhcat-site.xml" - }; - - public static final String PORT = "templeton.port"; - public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding"; - public static final String EXEC_ENVS_NAME = "templeton.exec.envs"; - public static final String EXEC_MAX_BYTES_NAME = "templeton.exec.max-output-bytes"; - public static final String EXEC_MAX_PROCS_NAME = "templeton.exec.max-procs"; - public static final String EXEC_TIMEOUT_NAME = "templeton.exec.timeout"; - public static final String HADOOP_QUEUE_NAME = "templeton.hadoop.queue.name"; - public static final String HADOOP_NAME = "templeton.hadoop"; - public static final String HADOOP_CONF_DIR = "templeton.hadoop.conf.dir"; - public static final String HCAT_NAME = "templeton.hcat"; - public static final String HIVE_ARCHIVE_NAME = "templeton.hive.archive"; - public static final String HIVE_PATH_NAME = "templeton.hive.path"; - public static final String HIVE_PROPS_NAME = "templeton.hive.properties"; - public static final String LIB_JARS_NAME = "templeton.libjars"; - public static final String PIG_ARCHIVE_NAME = "templeton.pig.archive"; - public static final String PIG_PATH_NAME = "templeton.pig.path"; - public static final String STREAMING_JAR_NAME = "templeton.streaming.jar"; - public static final String TEMPLETON_JAR_NAME = "templeton.jar"; - public static final String OVERRIDE_JARS_NAME = "templeton.override.jars"; - public static final String OVERRIDE_JARS_ENABLED = "templeton.override.enabled"; - public static final String TEMPLETON_CONTROLLER_MR_CHILD_OPTS - = "templeton.controller.mr.child.opts"; - - public static final String KERBEROS_SECRET = "templeton.kerberos.secret"; - public static final String KERBEROS_PRINCIPAL = "templeton.kerberos.principal"; - public static final String KERBEROS_KEYTAB = "templeton.kerberos.keytab"; - - public static final String CALLBACK_INTERVAL_NAME - = "templeton.callback.retry.interval"; - public static final String CALLBACK_RETRY_NAME - = "templeton.callback.retry.attempts"; - - //Hadoop property names (set by templeton logic) - public static final String HADOOP_END_INTERVAL_NAME = "job.end.retry.interval"; - public static final String HADOOP_END_RETRY_NAME = "job.end.retry.attempts"; - public static final String HADOOP_END_URL_NAME = "job.end.notification.url"; - public static final String HADOOP_SPECULATIVE_NAME - = "mapred.map.tasks.speculative.execution"; - public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts"; - public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; - - - private static final Log LOG = LogFactory.getLog(AppConfig.class); - - public AppConfig() { - init(); - LOG.info("Using Hadoop version " + VersionInfo.getVersion()); + public static final String[] HADOOP_CONF_FILENAMES = { + "core-default.xml", "core-site.xml", "mapred-default.xml", "mapred-site.xml", "hdfs-site.xml" + }; + + public static final String[] HADOOP_PREFIX_VARS = { + "HADOOP_PREFIX", "HADOOP_HOME" + }; + + public static final String TEMPLETON_HOME_VAR = "TEMPLETON_HOME"; + + public static final String[] TEMPLETON_CONF_FILENAMES = { + "webhcat-default.xml", + "webhcat-site.xml" + }; + + public static final String PORT = "templeton.port"; + public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding"; + public static final String EXEC_ENVS_NAME = "templeton.exec.envs"; + public static final String EXEC_MAX_BYTES_NAME = "templeton.exec.max-output-bytes"; + public static final String EXEC_MAX_PROCS_NAME = "templeton.exec.max-procs"; + public static final String EXEC_TIMEOUT_NAME = "templeton.exec.timeout"; + public static final String HADOOP_QUEUE_NAME = "templeton.hadoop.queue.name"; + public static final String HADOOP_NAME = "templeton.hadoop"; + public static final String HADOOP_CONF_DIR = "templeton.hadoop.conf.dir"; + public static final String HCAT_NAME = "templeton.hcat"; + public static final String HIVE_ARCHIVE_NAME = "templeton.hive.archive"; + public static final String HIVE_PATH_NAME = "templeton.hive.path"; + public static final String HIVE_PROPS_NAME = "templeton.hive.properties"; + public static final String LIB_JARS_NAME = "templeton.libjars"; + public static final String PIG_ARCHIVE_NAME = "templeton.pig.archive"; + public static final String PIG_PATH_NAME = "templeton.pig.path"; + public static final String STREAMING_JAR_NAME = "templeton.streaming.jar"; + public static final String TEMPLETON_JAR_NAME = "templeton.jar"; + public static final String OVERRIDE_JARS_NAME = "templeton.override.jars"; + public static final String OVERRIDE_JARS_ENABLED = "templeton.override.enabled"; + public static final String TEMPLETON_CONTROLLER_MR_CHILD_OPTS + = "templeton.controller.mr.child.opts"; + + public static final String KERBEROS_SECRET = "templeton.kerberos.secret"; + public static final String KERBEROS_PRINCIPAL = "templeton.kerberos.principal"; + public static final String KERBEROS_KEYTAB = "templeton.kerberos.keytab"; + + public static final String CALLBACK_INTERVAL_NAME + = "templeton.callback.retry.interval"; + public static final String CALLBACK_RETRY_NAME + = "templeton.callback.retry.attempts"; + + //Hadoop property names (set by templeton logic) + public static final String HADOOP_END_INTERVAL_NAME = "job.end.retry.interval"; + public static final String HADOOP_END_RETRY_NAME = "job.end.retry.attempts"; + public static final String HADOOP_END_URL_NAME = "job.end.notification.url"; + public static final String HADOOP_SPECULATIVE_NAME + = "mapred.map.tasks.speculative.execution"; + public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts"; + public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; + + + private static final Log LOG = LogFactory.getLog(AppConfig.class); + + public AppConfig() { + init(); + LOG.info("Using Hadoop version " + VersionInfo.getVersion()); + } + + private void init() { + for (Map.Entry e : System.getenv().entrySet()) + set("env." + e.getKey(), e.getValue()); + + String templetonDir = getTempletonDir(); + for (String fname : TEMPLETON_CONF_FILENAMES) + if (! loadOneClasspathConfig(fname)) + loadOneFileConfig(templetonDir, fname); + + String hadoopConfDir = getHadoopConfDir(); + for (String fname : HADOOP_CONF_FILENAMES) + loadOneFileConfig(hadoopConfDir, fname); + ProxyUserSupport.processProxyuserConfig(this); + } + + public void startCleanup() { + JobState.getStorageInstance(this).startCleanup(this); + } + + public String getHadoopConfDir() { + return get(HADOOP_CONF_DIR); + } + + public static String getTempletonDir() { + return System.getenv(TEMPLETON_HOME_VAR); + } + + private boolean loadOneFileConfig(String dir, String fname) { + if (dir != null) { + File f = new File(dir, fname); + if (f.exists()) { + addResource(new Path(f.getAbsolutePath())); + LOG.debug("loaded config file " + f.getAbsolutePath()); + return true; + } } - - private void init() { - for (Map.Entry e : System.getenv().entrySet()) - set("env." + e.getKey(), e.getValue()); - - String templetonDir = getTempletonDir(); - for (String fname : TEMPLETON_CONF_FILENAMES) - if (! loadOneClasspathConfig(fname)) - loadOneFileConfig(templetonDir, fname); - - String hadoopConfDir = getHadoopConfDir(); - for (String fname : HADOOP_CONF_FILENAMES) - loadOneFileConfig(hadoopConfDir, fname); - ProxyUserSupport.processProxyuserConfig(this); - } - - public void startCleanup() { - JobState.getStorageInstance(this).startCleanup(this); - } - - public String getHadoopConfDir() { - return get(HADOOP_CONF_DIR); - } - - public static String getTempletonDir() { - return System.getenv(TEMPLETON_HOME_VAR); - } - - private boolean loadOneFileConfig(String dir, String fname) { - if (dir != null) { - File f = new File(dir, fname); - if (f.exists()) { - addResource(new Path(f.getAbsolutePath())); - LOG.debug("loaded config file " + f.getAbsolutePath()); - return true; - } - } - return false; - } - - private boolean loadOneClasspathConfig(String fname) { - URL x = getResource(fname); - if (x != null) { - addResource(x); - LOG.debug("loaded config from classpath " + x); - return true; - } - - return false; - } - - public String templetonJar() { return get(TEMPLETON_JAR_NAME); } - public String libJars() { return get(LIB_JARS_NAME); } - public String hadoopQueueName() { return get(HADOOP_QUEUE_NAME); } - public String clusterHadoop() { return get(HADOOP_NAME); } - public String clusterHcat() { return get(HCAT_NAME); } - public String pigPath() { return get(PIG_PATH_NAME); } - public String pigArchive() { return get(PIG_ARCHIVE_NAME); } - public String hivePath() { return get(HIVE_PATH_NAME); } - public String hiveArchive() { return get(HIVE_ARCHIVE_NAME); } - public String streamingJar() { return get(STREAMING_JAR_NAME); } - public String kerberosSecret() { return get(KERBEROS_SECRET); } - public String kerberosPrincipal(){ return get(KERBEROS_PRINCIPAL); } - public String kerberosKeytab() { return get(KERBEROS_KEYTAB); } - public String controllerMRChildOpts() { - return get(TEMPLETON_CONTROLLER_MR_CHILD_OPTS); - } - - - - public String[] overrideJars() { - if (getBoolean(OVERRIDE_JARS_ENABLED, true)) - return getStrings(OVERRIDE_JARS_NAME); - else - return null; - } - public String overrideJarsString() { - if (getBoolean(OVERRIDE_JARS_ENABLED, true)) - return get(OVERRIDE_JARS_NAME); - else - return null; - } - - public long zkCleanupInterval() { - return getLong(ZooKeeperCleanup.ZK_CLEANUP_INTERVAL, - (1000L * 60L * 60L * 12L)); - } - - public long zkMaxAge() { - return getLong(ZooKeeperCleanup.ZK_CLEANUP_MAX_AGE, - (1000L * 60L * 60L * 24L * 7L)); + return false; + } + + private boolean loadOneClasspathConfig(String fname) { + URL x = getResource(fname); + if (x != null) { + addResource(x); + LOG.debug("loaded config from classpath " + x); + return true; } - public String zkHosts() { return get(ZooKeeperStorage.ZK_HOSTS); } - public int zkSessionTimeout() { return getInt(ZooKeeperStorage.ZK_SESSION_TIMEOUT, 30000); } + return false; + } + + public String templetonJar() { return get(TEMPLETON_JAR_NAME); } + public String libJars() { return get(LIB_JARS_NAME); } + public String hadoopQueueName() { return get(HADOOP_QUEUE_NAME); } + public String clusterHadoop() { return get(HADOOP_NAME); } + public String clusterHcat() { return get(HCAT_NAME); } + public String pigPath() { return get(PIG_PATH_NAME); } + public String pigArchive() { return get(PIG_ARCHIVE_NAME); } + public String hivePath() { return get(HIVE_PATH_NAME); } + public String hiveArchive() { return get(HIVE_ARCHIVE_NAME); } + public String streamingJar() { return get(STREAMING_JAR_NAME); } + public String kerberosSecret() { return get(KERBEROS_SECRET); } + public String kerberosPrincipal(){ return get(KERBEROS_PRINCIPAL); } + public String kerberosKeytab() { return get(KERBEROS_KEYTAB); } + public String controllerMRChildOpts() { + return get(TEMPLETON_CONTROLLER_MR_CHILD_OPTS); + } + + + + public String[] overrideJars() { + if (getBoolean(OVERRIDE_JARS_ENABLED, true)) + return getStrings(OVERRIDE_JARS_NAME); + else + return null; + } + public String overrideJarsString() { + if (getBoolean(OVERRIDE_JARS_ENABLED, true)) + return get(OVERRIDE_JARS_NAME); + else + return null; + } + + public long zkCleanupInterval() { + return getLong(ZooKeeperCleanup.ZK_CLEANUP_INTERVAL, + (1000L * 60L * 60L * 12L)); + } + + public long zkMaxAge() { + return getLong(ZooKeeperCleanup.ZK_CLEANUP_MAX_AGE, + (1000L * 60L * 60L * 24L * 7L)); + } + + public String zkHosts() { return get(ZooKeeperStorage.ZK_HOSTS); } + public int zkSessionTimeout() { return getInt(ZooKeeperStorage.ZK_SESSION_TIMEOUT, 30000); } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BadParam.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BadParam.java index d94356d..9c9db30 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BadParam.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BadParam.java @@ -24,7 +24,7 @@ * Missing required or badly configured paramater. */ public class BadParam extends SimpleWebException { - public BadParam(String msg) { - super(HttpStatus.BAD_REQUEST_400, msg); - } + public BadParam(String msg) { + super(HttpStatus.BAD_REQUEST_400, msg); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BusyException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BusyException.java index 548c783..64261fa 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BusyException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BusyException.java @@ -24,7 +24,7 @@ * Simple "we are busy, try again" exception. */ public class BusyException extends SimpleWebException { - public BusyException() { - super(HttpStatus.SERVICE_UNAVAILABLE_503, "Busy, please retry"); - } + public BusyException() { + super(HttpStatus.SERVICE_UNAVAILABLE_503, "Busy, please retry"); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CallbackFailedException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CallbackFailedException.java index e73314f..05f035e 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CallbackFailedException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CallbackFailedException.java @@ -24,7 +24,7 @@ * The callback failed when it tried to reach the callback URL. */ public class CallbackFailedException extends SimpleWebException { - public CallbackFailedException(String msg) { - super(HttpStatus.BAD_REQUEST_400, msg); - } + public CallbackFailedException(String msg) { + super(HttpStatus.BAD_REQUEST_400, msg); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CatchallExceptionMapper.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CatchallExceptionMapper.java index 5201621..4288f5d 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CatchallExceptionMapper.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CatchallExceptionMapper.java @@ -34,14 +34,14 @@ */ @Provider public class CatchallExceptionMapper - implements ExceptionMapper { - private static final Log LOG = LogFactory.getLog(CatchallExceptionMapper.class); + implements ExceptionMapper { + private static final Log LOG = LogFactory.getLog(CatchallExceptionMapper.class); - public Response toResponse(Exception e) { - LOG.error(e.getMessage(), e); - if (e instanceof NotFoundException) { - return SimpleWebException.buildMessage(HttpStatus.NOT_FOUND_404, null, e.getMessage()); - } - return SimpleWebException.buildMessage(HttpStatus.INTERNAL_SERVER_ERROR_500, null, e.getMessage()); + public Response toResponse(Exception e) { + LOG.error(e.getMessage(), e); + if (e instanceof NotFoundException) { + return SimpleWebException.buildMessage(HttpStatus.NOT_FOUND_404, null, e.getMessage()); } + return SimpleWebException.buildMessage(HttpStatus.INTERNAL_SERVER_ERROR_500, null, e.getMessage()); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ColumnDesc.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ColumnDesc.java index 99718ca..f1b4975 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ColumnDesc.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ColumnDesc.java @@ -25,36 +25,36 @@ */ @XmlRootElement public class ColumnDesc extends GroupPermissionsDesc { - public String name; - public String type; - public String comment; + public String name; + public String type; + public String comment; - public ColumnDesc() {} + public ColumnDesc() {} - /** - * Create a new ColumnDesc - */ - public ColumnDesc(String name, String type, String comment) { - this.name = name; - this.type = type; - this.comment = comment; - } + /** + * Create a new ColumnDesc + */ + public ColumnDesc(String name, String type, String comment) { + this.name = name; + this.type = type; + this.comment = comment; + } - public String toString() { - return String.format("ColumnDesc(name=%s, type=%s, comment=%s)", - name, type, comment); - } + public String toString() { + return String.format("ColumnDesc(name=%s, type=%s, comment=%s)", + name, type, comment); + } - public boolean equals(Object o) { - if (this == o) - return true; - if (! (o instanceof ColumnDesc)) - return false; - ColumnDesc that = (ColumnDesc) o; - return xequals(this.name, that.name) - && xequals(this.type, that.type) - && xequals(this.comment, that.comment) - && super.equals(that) - ; - } + public boolean equals(Object o) { + if (this == o) + return true; + if (! (o instanceof ColumnDesc)) + return false; + ColumnDesc that = (ColumnDesc) o; + return xequals(this.name, that.name) + && xequals(this.type, that.type) + && xequals(this.comment, that.comment) + && super.equals(that) + ; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteBean.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteBean.java index 3404529..c4da29d 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteBean.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteBean.java @@ -22,16 +22,16 @@ * CompleteBean - The results of an CompleteDelegator run. */ public class CompleteBean { - public String status; + public String status; - public CompleteBean() {} + public CompleteBean() {} - /** - * Create a new CompleteBean - * - * @param status run status - */ - public CompleteBean(String status) { - this.status = status; - } + /** + * Create a new CompleteBean + * + * @param status run status + */ + public CompleteBean(String status) { + this.status = status; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java index 2ef57a4..4cee5bc 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java @@ -45,69 +45,69 @@ * See LauncherDelegator for the HADOOP_END_RETRY* vars that are set. */ public class CompleteDelegator extends TempletonDelegator { - private static final Log LOG = LogFactory.getLog(CompleteDelegator.class); + private static final Log LOG = LogFactory.getLog(CompleteDelegator.class); - public CompleteDelegator(AppConfig appConf) { - super(appConf); - } + public CompleteDelegator(AppConfig appConf) { + super(appConf); + } - public CompleteBean run(String id) - throws CallbackFailedException, IOException { - if (id == null) - acceptWithError("No jobid given"); + public CompleteBean run(String id) + throws CallbackFailedException, IOException { + if (id == null) + acceptWithError("No jobid given"); - JobState state = null; - try { - state = new JobState(id, Main.getAppConfigInstance()); - if (state.getCompleteStatus() == null) - failed("Job not yet complete", null); + JobState state = null; + try { + state = new JobState(id, Main.getAppConfigInstance()); + if (state.getCompleteStatus() == null) + failed("Job not yet complete", null); - Long notified = state.getNotifiedTime(); - if (notified != null) - return acceptWithError("Callback already run on " - + new Date(notified.longValue())); + Long notified = state.getNotifiedTime(); + if (notified != null) + return acceptWithError("Callback already run on " + + new Date(notified.longValue())); - String callback = state.getCallback(); - if (callback == null) - return new CompleteBean("No callback registered"); + String callback = state.getCallback(); + if (callback == null) + return new CompleteBean("No callback registered"); - try { - doCallback(state.getId(), callback); - } catch (Exception e) { - failed("Callback failed " + callback + " for " + id, e); - } + try { + doCallback(state.getId(), callback); + } catch (Exception e) { + failed("Callback failed " + callback + " for " + id, e); + } - state.setNotifiedTime(System.currentTimeMillis()); - return new CompleteBean("Callback sent"); - } finally { - if (state != null) - state.close(); - } + state.setNotifiedTime(System.currentTimeMillis()); + return new CompleteBean("Callback sent"); + } finally { + if (state != null) + state.close(); } + } - /** - * Call the callback url with the jobid to let them know it's - * finished. If the url has the string $jobId in it, it will be - * replaced with the completed jobid. - */ - public static void doCallback(String jobid, String url) - throws MalformedURLException, IOException { - if (url.contains("$jobId")) - url = url.replace("$jobId", jobid); - TempletonUtils.fetchUrl(new URL(url)); - } + /** + * Call the callback url with the jobid to let them know it's + * finished. If the url has the string $jobId in it, it will be + * replaced with the completed jobid. + */ + public static void doCallback(String jobid, String url) + throws MalformedURLException, IOException { + if (url.contains("$jobId")) + url = url.replace("$jobId", jobid); + TempletonUtils.fetchUrl(new URL(url)); + } - private void failed(String msg, Exception e) - throws CallbackFailedException { - if (e != null) - LOG.error(msg, e); - else - LOG.error(msg); - throw new CallbackFailedException(msg); - } + private void failed(String msg, Exception e) + throws CallbackFailedException { + if (e != null) + LOG.error(msg, e); + else + LOG.error(msg); + throw new CallbackFailedException(msg); + } - private CompleteBean acceptWithError(String msg) { - LOG.error(msg); - return new CompleteBean(msg); - } + private CompleteBean acceptWithError(String msg) { + LOG.error(msg); + return new CompleteBean(msg); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DatabaseDesc.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DatabaseDesc.java index 0e262a8..5a317d5 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DatabaseDesc.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DatabaseDesc.java @@ -27,16 +27,16 @@ */ @XmlRootElement public class DatabaseDesc extends GroupPermissionsDesc { - public boolean ifNotExists; - public String database; - public String comment; - public String location; - public Map properties; + public boolean ifNotExists; + public String database; + public String comment; + public String location; + public Map properties; - public DatabaseDesc() {} + public DatabaseDesc() {} - public String toString() { - return String.format("DatabaseDesc(database=%s, comment=%s, location=%s, " + - "properties=%s)", database, comment, location, properties); - } + public String toString() { + return String.format("DatabaseDesc(database=%s, comment=%s, location=%s, " + + "properties=%s)", database, comment, location, properties); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DeleteDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DeleteDelegator.java index 3593004..7b9fad1 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DeleteDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DeleteDelegator.java @@ -30,34 +30,34 @@ * Delete a job */ public class DeleteDelegator extends TempletonDelegator { - public DeleteDelegator(AppConfig appConf) { - super(appConf); - } + public DeleteDelegator(AppConfig appConf) { + super(appConf); + } - public QueueStatusBean run(String user, String id) - throws NotAuthorizedException, BadParam, IOException, InterruptedException - { - UserGroupInformation ugi = UgiFactory.getUgi(user); - WebHCatJTShim tracker = null; - JobState state = null; - try { - tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); - JobID jobid = StatusDelegator.StringToJobID(id); - if (jobid == null) - throw new BadParam("Invalid jobid: " + id); - tracker.killJob(jobid); - state = new JobState(id, Main.getAppConfigInstance()); - String childid = state.getChildId(); - if (childid != null) - tracker.killJob(StatusDelegator.StringToJobID(childid)); - return StatusDelegator.makeStatus(tracker, jobid, state); - } catch (IllegalStateException e) { - throw new BadParam(e.getMessage()); - } finally { - if (tracker != null) - tracker.close(); - if (state != null) - state.close(); - } + public QueueStatusBean run(String user, String id) + throws NotAuthorizedException, BadParam, IOException, InterruptedException + { + UserGroupInformation ugi = UgiFactory.getUgi(user); + WebHCatJTShim tracker = null; + JobState state = null; + try { + tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); + JobID jobid = StatusDelegator.StringToJobID(id); + if (jobid == null) + throw new BadParam("Invalid jobid: " + id); + tracker.killJob(jobid); + state = new JobState(id, Main.getAppConfigInstance()); + String childid = state.getChildId(); + if (childid != null) + tracker.killJob(StatusDelegator.StringToJobID(childid)); + return StatusDelegator.makeStatus(tracker, jobid, state); + } catch (IllegalStateException e) { + throw new BadParam(e.getMessage()); + } finally { + if (tracker != null) + tracker.close(); + if (state != null) + state.close(); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/EnqueueBean.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/EnqueueBean.java index f08964e..ce38d89 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/EnqueueBean.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/EnqueueBean.java @@ -22,16 +22,16 @@ * EnqueueBean - The results of a call that enqueues a Hadoop job. */ public class EnqueueBean { - public String id; + public String id; - public EnqueueBean() {} + public EnqueueBean() {} - /** - * Create a new EnqueueBean. - * - * @param id job id - */ - public EnqueueBean(String id) { - this.id = id; - } + /** + * Create a new EnqueueBean. + * + * @param id job id + */ + public EnqueueBean(String id) { + this.id = id; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecBean.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecBean.java index c6c7b6e..01eee6d 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecBean.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecBean.java @@ -22,27 +22,27 @@ * ExecBean - The results of an exec call. */ public class ExecBean { - public String stdout; - public String stderr; - public int exitcode; + public String stdout; + public String stderr; + public int exitcode; - public ExecBean() {} + public ExecBean() {} - /** - * Create a new ExecBean. - * - * @param stdout standard output of the the program. - * @param stderr error output of the the program. - * @param exitcode exit code of the program. - */ - public ExecBean(String stdout, String stderr, int exitcode) { - this.stdout = stdout; - this.stderr = stderr; - this.exitcode = exitcode; - } + /** + * Create a new ExecBean. + * + * @param stdout standard output of the the program. + * @param stderr error output of the the program. + * @param exitcode exit code of the program. + */ + public ExecBean(String stdout, String stderr, int exitcode) { + this.stdout = stdout; + this.stderr = stderr; + this.exitcode = exitcode; + } - public String toString() { - return String.format("ExecBean(stdout=%s, stderr=%s, exitcode=%s)", - stdout, stderr, exitcode); - } + public String toString() { + return String.format("ExecBean(stdout=%s, stderr=%s, exitcode=%s)", + stdout, stderr, exitcode); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecService.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecService.java index 5e8db93..6f42b7b 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecService.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecService.java @@ -25,11 +25,11 @@ import org.apache.commons.exec.ExecuteException; public interface ExecService { - public ExecBean run(String program, List args, - Map env) - throws NotAuthorizedException, BusyException, ExecuteException, IOException; + public ExecBean run(String program, List args, + Map env) + throws NotAuthorizedException, BusyException, ExecuteException, IOException; - public ExecBean runUnlimited(String program, List args, - Map env) - throws NotAuthorizedException, ExecuteException, IOException; + public ExecBean runUnlimited(String program, List args, + Map env) + throws NotAuthorizedException, ExecuteException, IOException; } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecServiceImpl.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecServiceImpl.java index 23577d0..77ee6af 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecServiceImpl.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecServiceImpl.java @@ -40,155 +40,155 @@ * ExecService.run and ExecService.runUnlimited for details. */ public class ExecServiceImpl implements ExecService { - private static final Log LOG = LogFactory.getLog(ExecServiceImpl.class); - private static AppConfig appConf = Main.getAppConfigInstance(); - - private static volatile ExecServiceImpl theSingleton; - - /** - * Retrieve the singleton. - */ - public static synchronized ExecServiceImpl getInstance() { - if (theSingleton == null) { - theSingleton = new ExecServiceImpl(); - } - return theSingleton; - } - - private Semaphore avail; + private static final Log LOG = LogFactory.getLog(ExecServiceImpl.class); + private static AppConfig appConf = Main.getAppConfigInstance(); - private ExecServiceImpl() { - avail = new Semaphore(appConf.getInt(AppConfig.EXEC_MAX_PROCS_NAME, 16)); - } + private static volatile ExecServiceImpl theSingleton; - /** - * Run the program synchronously as the given user. We rate limit - * the number of processes that can simultaneously created for - * this instance. - * - * @param program The program to run - * @param args Arguments to pass to the program - * @param env Any extra environment variables to set - * @return The result of the run. - */ - public ExecBean run(String program, List args, - Map env) - throws NotAuthorizedException, BusyException, ExecuteException, IOException { - boolean aquired = false; - try { - aquired = avail.tryAcquire(); - if (aquired) { - return runUnlimited(program, args, env); - } else { - throw new BusyException(); - } - } finally { - if (aquired) { - avail.release(); - } - } + /** + * Retrieve the singleton. + */ + public static synchronized ExecServiceImpl getInstance() { + if (theSingleton == null) { + theSingleton = new ExecServiceImpl(); } - - /** - * Run the program synchronously as the given user. Warning: - * CommandLine will trim the argument strings. - * - * @param program The program to run. - * @param args Arguments to pass to the program - * @param env Any extra environment variables to set - * @return The result of the run. - */ - public ExecBean runUnlimited(String program, List args, - Map env) - throws NotAuthorizedException, ExecuteException, IOException { - try { - return auxRun(program, args, env); - } catch (IOException e) { - File cwd = new java.io.File("."); - if (cwd.canRead() && cwd.canWrite()) - throw e; - else - throw new IOException("Invalid permissions on Templeton directory: " - + cwd.getCanonicalPath()); - } + return theSingleton; + } + + private Semaphore avail; + + private ExecServiceImpl() { + avail = new Semaphore(appConf.getInt(AppConfig.EXEC_MAX_PROCS_NAME, 16)); + } + + /** + * Run the program synchronously as the given user. We rate limit + * the number of processes that can simultaneously created for + * this instance. + * + * @param program The program to run + * @param args Arguments to pass to the program + * @param env Any extra environment variables to set + * @return The result of the run. + */ + public ExecBean run(String program, List args, + Map env) + throws NotAuthorizedException, BusyException, ExecuteException, IOException { + boolean aquired = false; + try { + aquired = avail.tryAcquire(); + if (aquired) { + return runUnlimited(program, args, env); + } else { + throw new BusyException(); + } + } finally { + if (aquired) { + avail.release(); + } } - - private ExecBean auxRun(String program, List args, Map env) - throws NotAuthorizedException, ExecuteException, IOException { - DefaultExecutor executor = new DefaultExecutor(); - executor.setExitValues(null); - - // Setup stdout and stderr - int nbytes = appConf.getInt(AppConfig.EXEC_MAX_BYTES_NAME, -1); - ByteArrayOutputStream outStream = new MaxByteArrayOutputStream(nbytes); - ByteArrayOutputStream errStream = new MaxByteArrayOutputStream(nbytes); - executor.setStreamHandler(new PumpStreamHandler(outStream, errStream)); - - // Only run for N milliseconds - int timeout = appConf.getInt(AppConfig.EXEC_TIMEOUT_NAME, 0); - ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout); - executor.setWatchdog(watchdog); - - CommandLine cmd = makeCommandLine(program, args); - - LOG.info("Running: " + cmd); - ExecBean res = new ExecBean(); - res.exitcode = executor.execute(cmd, execEnv(env)); - String enc = appConf.get(AppConfig.EXEC_ENCODING_NAME); - res.stdout = outStream.toString(enc); - res.stderr = errStream.toString(enc); - - return res; + } + + /** + * Run the program synchronously as the given user. Warning: + * CommandLine will trim the argument strings. + * + * @param program The program to run. + * @param args Arguments to pass to the program + * @param env Any extra environment variables to set + * @return The result of the run. + */ + public ExecBean runUnlimited(String program, List args, + Map env) + throws NotAuthorizedException, ExecuteException, IOException { + try { + return auxRun(program, args, env); + } catch (IOException e) { + File cwd = new java.io.File("."); + if (cwd.canRead() && cwd.canWrite()) + throw e; + else + throw new IOException("Invalid permissions on Templeton directory: " + + cwd.getCanonicalPath()); } - - private CommandLine makeCommandLine(String program, - List args) - throws NotAuthorizedException, IOException { - String path = validateProgram(program); - CommandLine cmd = new CommandLine(path); - if (args != null) - for (String arg : args) - cmd.addArgument(arg, false); - - return cmd; + } + + private ExecBean auxRun(String program, List args, Map env) + throws NotAuthorizedException, ExecuteException, IOException { + DefaultExecutor executor = new DefaultExecutor(); + executor.setExitValues(null); + + // Setup stdout and stderr + int nbytes = appConf.getInt(AppConfig.EXEC_MAX_BYTES_NAME, -1); + ByteArrayOutputStream outStream = new MaxByteArrayOutputStream(nbytes); + ByteArrayOutputStream errStream = new MaxByteArrayOutputStream(nbytes); + executor.setStreamHandler(new PumpStreamHandler(outStream, errStream)); + + // Only run for N milliseconds + int timeout = appConf.getInt(AppConfig.EXEC_TIMEOUT_NAME, 0); + ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout); + executor.setWatchdog(watchdog); + + CommandLine cmd = makeCommandLine(program, args); + + LOG.info("Running: " + cmd); + ExecBean res = new ExecBean(); + res.exitcode = executor.execute(cmd, execEnv(env)); + String enc = appConf.get(AppConfig.EXEC_ENCODING_NAME); + res.stdout = outStream.toString(enc); + res.stderr = errStream.toString(enc); + + return res; + } + + private CommandLine makeCommandLine(String program, + List args) + throws NotAuthorizedException, IOException { + String path = validateProgram(program); + CommandLine cmd = new CommandLine(path); + if (args != null) + for (String arg : args) + cmd.addArgument(arg, false); + + return cmd; + } + + /** + * Build the environment used for all exec calls. + * + * @return The environment variables. + */ + public Map execEnv(Map env) { + HashMap res = new HashMap(); + + for (String key : appConf.getStrings(AppConfig.EXEC_ENVS_NAME)) { + String val = System.getenv(key); + if (val != null) { + res.put(key, val); + } } - - /** - * Build the environment used for all exec calls. - * - * @return The environment variables. - */ - public Map execEnv(Map env) { - HashMap res = new HashMap(); - - for (String key : appConf.getStrings(AppConfig.EXEC_ENVS_NAME)) { - String val = System.getenv(key); - if (val != null) { - res.put(key, val); - } - } - if (env != null) - res.putAll(env); - for (Map.Entry envs : res.entrySet()) { - LOG.info("Env " + envs.getKey() + "=" + envs.getValue()); - } - return res; + if (env != null) + res.putAll(env); + for (Map.Entry envs : res.entrySet()) { + LOG.info("Env " + envs.getKey() + "=" + envs.getValue()); } - - /** - * Given a program name, lookup the fully qualified path. Throws - * an exception if the program is missing or not authorized. - * - * @param path The path of the program. - * @return The path of the validated program. - */ - public String validateProgram(String path) - throws NotAuthorizedException, IOException { - File f = new File(path); - if (f.canExecute()) { - return f.getCanonicalPath(); - } else { - throw new NotAuthorizedException("Unable to access program: " + path); - } + return res; + } + + /** + * Given a program name, lookup the fully qualified path. Throws + * an exception if the program is missing or not authorized. + * + * @param path The path of the program. + * @return The path of the validated program. + */ + public String validateProgram(String path) + throws NotAuthorizedException, IOException { + File f = new File(path); + if (f.canExecute()) { + return f.getCanonicalPath(); + } else { + throw new NotAuthorizedException("Unable to access program: " + path); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/GroupPermissionsDesc.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/GroupPermissionsDesc.java index 6e9e521..e91fcab 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/GroupPermissionsDesc.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/GroupPermissionsDesc.java @@ -22,34 +22,34 @@ * The base create permissions for ddl objects. */ public abstract class GroupPermissionsDesc { - public String group; - public String permissions; + public String group; + public String permissions; - public GroupPermissionsDesc() {} + public GroupPermissionsDesc() {} - protected static boolean xequals(Object a, Object b) { - if (a == null) { - if (b == null) - return true; - else - return false; - } - - return a.equals(b); + protected static boolean xequals(Object a, Object b) { + if (a == null) { + if (b == null) + return true; + else + return false; } - protected static boolean xequals(boolean a, boolean b) { return a == b; } - protected static boolean xequals(int a, int b) { return a == b; } - protected static boolean xequals(char a, char b) { return a == b; } + return a.equals(b); + } - public boolean equals(Object o) { - if (this == o) - return true; - if (! (o instanceof GroupPermissionsDesc)) - return false; - GroupPermissionsDesc that = (GroupPermissionsDesc) o; - return xequals(this.group, that.group) - && xequals(this.permissions, that.permissions) - ; - } + protected static boolean xequals(boolean a, boolean b) { return a == b; } + protected static boolean xequals(int a, int b) { return a == b; } + protected static boolean xequals(char a, char b) { return a == b; } + + public boolean equals(Object o) { + if (this == o) + return true; + if (! (o instanceof GroupPermissionsDesc)) + return false; + GroupPermissionsDesc that = (GroupPermissionsDesc) o; + return xequals(this.group, that.group) + && xequals(this.permissions, that.permissions) + ; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatDelegator.java index c260787..387cce8 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatDelegator.java @@ -25,7 +25,7 @@ import java.util.List; import java.util.Map; import javax.ws.rs.core.Response; - + import org.apache.commons.exec.ExecuteException; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; @@ -43,811 +43,811 @@ * the backend of the ddl web service. */ public class HcatDelegator extends LauncherDelegator { - private static final Log LOG = LogFactory.getLog(HcatDelegator.class); - private ExecService execService; - - public HcatDelegator(AppConfig appConf, ExecService execService) { - super(appConf); - this.execService = execService; - } - - /** - * Run the local hcat executable. - */ - public ExecBean run(String user, String exec, boolean format, - String group, String permissions) - throws NotAuthorizedException, BusyException, ExecuteException, IOException { - SecureProxySupport proxy = new SecureProxySupport(); - try { - List args = makeArgs(exec, format, group, permissions); - proxy.open(user, appConf); - - // Setup the hadoop vars to specify the user. - String cp = makeOverrideClasspath(appConf); - Map env = TempletonUtils.hadoopUserEnv(user, cp); - proxy.addEnv(env); - proxy.addArgs(args); - return execService.run(appConf.clusterHcat(), args, env); - } catch (InterruptedException e) { - throw new IOException(e); - } finally { - if (proxy != null) - proxy.close(); - } - } - - private List makeArgs(String exec, boolean format, - String group, String permissions) { - ArrayList args = new ArrayList(); - args.add("-e"); - args.add(exec); - if (TempletonUtils.isset(group)) { - args.add("-g"); - args.add(group); - } - if (TempletonUtils.isset(permissions)) { - args.add("-p"); - args.add(permissions); - } - if (format) { - args.add("-D"); - args.add("hive.ddl.output.format=json"); - // Use both args to ease development. Delete this one on - // May 1. - args.add("-D"); - args.add("hive.format=json"); - } - LOG.info("Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)=" + - Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)); - if(System.getProperty("hive.metastore.warehouse.dir") != null) { - /*when running in unit test mode, pass this property to HCat, - which will in turn pass it to Hive to make sure that Hive - tries to write to a directory that exists.*/ - args.add("-D"); - args.add("hive.metastore.warehouse.dir=" + System.getProperty("hive.metastore.warehouse.dir")); - } - return args; - } - - /** - * Return a json description of the database. - */ - public Response descDatabase(String user, String db, boolean extended) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "desc database " + db + "; "; - if (extended) - exec = "desc database extended " + db + "; "; - - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res).build(); - } catch (HcatException e) { - throw new HcatException("unable to describe database: " + db, - e.execBean, exec); - } - } - - /** - * Return a json "show databases like". This will return a list of - * databases. - */ - public Response listDatabases(String user, String dbPattern) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("show databases like '%s';", dbPattern); - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to show databases for: " + dbPattern, - e.execBean, exec); - } - } - - /** - * Create a database with the given name - */ - public Response createDatabase(String user, DatabaseDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "create database"; - if (desc.ifNotExists) - exec += " if not exists"; - exec += " " + desc.database; - if (TempletonUtils.isset(desc.comment)) - exec += String.format(" comment '%s'", desc.comment); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - if (TempletonUtils.isset(desc.properties)) - exec += String.format(" with dbproperties (%s)", - makePropertiesStatement(desc.properties)); - exec += ";"; - - String res = jsonRun(user, exec, desc.group, desc.permissions); - return JsonBuilder.create(res) - .put("database", desc.database) - .build(); - } - - /** - * Drop the given database - */ - public Response dropDatabase(String user, String db, - boolean ifExists, String option, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "drop database"; - if (ifExists) - exec += " if exists"; - exec += " " + db; - if (TempletonUtils.isset(option)) - exec += " " + option; - exec += ";"; - - String res = jsonRun(user, exec, group, permissions); - return JsonBuilder.create(res) - .put("database", db) - .build(); - } - - /** - * Create a table. - */ - public Response createTable(String user, String db, TableDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = makeCreateTable(db, desc); - - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - - return JsonBuilder.create(res) - .put("database", db) - .put("table", desc.table) - .build(); - } catch (final HcatException e) { - throw new HcatException("unable to create table: " + desc.table, - e.execBean, exec); - } - } - - /** - * Create a table like another. - */ - public Response createTableLike(String user, String db, TableLikeDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; create", db); - - if (desc.external) - exec += " external"; - exec += String.format(" table %s like %s", desc.newTable, desc.existingTable); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - exec += ";"; - - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - - return JsonBuilder.create(res) - .put("database", db) - .put("table", desc.newTable) - .build(); - } catch (final HcatException e) { - throw new HcatException("unable to create table: " + desc.newTable, - e.execBean, exec); - } - } - - /** - * Return a json description of the table. - */ - public Response descTable(String user, String db, String table, boolean extended) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "use " + db + "; "; - if (extended) - exec += "desc extended " + table + "; "; - else - exec += "desc " + table + "; "; - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to describe database: " + db, - e.execBean, exec); - } - } - - /** - * Return a json "show table like". This will return a list of - * tables. - */ - public Response listTables(String user, String db, String tablePattern) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; show tables like '%s';", - db, tablePattern); - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .put("database", db) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to show tables for: " + tablePattern, - e.execBean, exec); - } - } - - /** - * Return a json "show table extended like". This will return - * only the first single table. - */ - public Response descExtendedTable(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; show table extended like %s;", - db, table); + private static final Log LOG = LogFactory.getLog(HcatDelegator.class); + private ExecService execService; + + public HcatDelegator(AppConfig appConf, ExecService execService) { + super(appConf); + this.execService = execService; + } + + /** + * Run the local hcat executable. + */ + public ExecBean run(String user, String exec, boolean format, + String group, String permissions) + throws NotAuthorizedException, BusyException, ExecuteException, IOException { + SecureProxySupport proxy = new SecureProxySupport(); + try { + List args = makeArgs(exec, format, group, permissions); + proxy.open(user, appConf); + + // Setup the hadoop vars to specify the user. + String cp = makeOverrideClasspath(appConf); + Map env = TempletonUtils.hadoopUserEnv(user, cp); + proxy.addEnv(env); + proxy.addArgs(args); + return execService.run(appConf.clusterHcat(), args, env); + } catch (InterruptedException e) { + throw new IOException(e); + } finally { + if (proxy != null) + proxy.close(); + } + } + + private List makeArgs(String exec, boolean format, + String group, String permissions) { + ArrayList args = new ArrayList(); + args.add("-e"); + args.add(exec); + if (TempletonUtils.isset(group)) { + args.add("-g"); + args.add(group); + } + if (TempletonUtils.isset(permissions)) { + args.add("-p"); + args.add(permissions); + } + if (format) { + args.add("-D"); + args.add("hive.ddl.output.format=json"); + // Use both args to ease development. Delete this one on + // May 1. + args.add("-D"); + args.add("hive.format=json"); + } + LOG.info("Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)=" + + Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)); + if(System.getProperty("hive.metastore.warehouse.dir") != null) { + /*when running in unit test mode, pass this property to HCat, + which will in turn pass it to Hive to make sure that Hive + tries to write to a directory that exists.*/ + args.add("-D"); + args.add("hive.metastore.warehouse.dir=" + System.getProperty("hive.metastore.warehouse.dir")); + } + return args; + } + + /** + * Return a json description of the database. + */ + public Response descDatabase(String user, String db, boolean extended) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "desc database " + db + "; "; + if (extended) + exec = "desc database extended " + db + "; "; + + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res).build(); + } catch (HcatException e) { + throw new HcatException("unable to describe database: " + db, + e.execBean, exec); + } + } + + /** + * Return a json "show databases like". This will return a list of + * databases. + */ + public Response listDatabases(String user, String dbPattern) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("show databases like '%s';", dbPattern); + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to show databases for: " + dbPattern, + e.execBean, exec); + } + } + + /** + * Create a database with the given name + */ + public Response createDatabase(String user, DatabaseDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "create database"; + if (desc.ifNotExists) + exec += " if not exists"; + exec += " " + desc.database; + if (TempletonUtils.isset(desc.comment)) + exec += String.format(" comment '%s'", desc.comment); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + if (TempletonUtils.isset(desc.properties)) + exec += String.format(" with dbproperties (%s)", + makePropertiesStatement(desc.properties)); + exec += ";"; + + String res = jsonRun(user, exec, desc.group, desc.permissions); + return JsonBuilder.create(res) + .put("database", desc.database) + .build(); + } + + /** + * Drop the given database + */ + public Response dropDatabase(String user, String db, + boolean ifExists, String option, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "drop database"; + if (ifExists) + exec += " if exists"; + exec += " " + db; + if (TempletonUtils.isset(option)) + exec += " " + option; + exec += ";"; + + String res = jsonRun(user, exec, group, permissions); + return JsonBuilder.create(res) + .put("database", db) + .build(); + } + + /** + * Create a table. + */ + public Response createTable(String user, String db, TableDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = makeCreateTable(db, desc); + + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + + return JsonBuilder.create(res) + .put("database", db) + .put("table", desc.table) + .build(); + } catch (final HcatException e) { + throw new HcatException("unable to create table: " + desc.table, + e.execBean, exec); + } + } + + /** + * Create a table like another. + */ + public Response createTableLike(String user, String db, TableLikeDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; create", db); + + if (desc.external) + exec += " external"; + exec += String.format(" table %s like %s", desc.newTable, desc.existingTable); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + exec += ";"; + + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + + return JsonBuilder.create(res) + .put("database", db) + .put("table", desc.newTable) + .build(); + } catch (final HcatException e) { + throw new HcatException("unable to create table: " + desc.newTable, + e.execBean, exec); + } + } + + /** + * Return a json description of the table. + */ + public Response descTable(String user, String db, String table, boolean extended) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "use " + db + "; "; + if (extended) + exec += "desc extended " + table + "; "; + else + exec += "desc " + table + "; "; + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to describe database: " + db, + e.execBean, exec); + } + } + + /** + * Return a json "show table like". This will return a list of + * tables. + */ + public Response listTables(String user, String db, String tablePattern) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; show tables like '%s';", + db, tablePattern); + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .put("database", db) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to show tables for: " + tablePattern, + e.execBean, exec); + } + } + + /** + * Return a json "show table extended like". This will return + * only the first single table. + */ + public Response descExtendedTable(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; show table extended like %s;", + db, table); + try { + String res = jsonRun(user, exec); + JsonBuilder jb = JsonBuilder.create(singleTable(res, table)) + .remove("tableName") + .put("database", db) + .put("table", table); + + // If we can get them from HDFS, add group and permission + String loc = (String) jb.getMap().get("location"); + if (loc != null && loc.startsWith("hdfs://")) { try { - String res = jsonRun(user, exec); - JsonBuilder jb = JsonBuilder.create(singleTable(res, table)) - .remove("tableName") - .put("database", db) - .put("table", table); - - // If we can get them from HDFS, add group and permission - String loc = (String) jb.getMap().get("location"); - if (loc != null && loc.startsWith("hdfs://")) { - try { - FileSystem fs = FileSystem.get(appConf); - FileStatus status = fs.getFileStatus(new Path(new URI(loc))); - jb.put("group", status.getGroup()); - jb.put("permission", status.getPermission().toString()); - } catch (Exception e) { - LOG.warn(e.getMessage() + " Couldn't get permissions for " + loc); - } - } - return jb.build(); - } catch (HcatException e) { - throw new HcatException("unable to show table: " + table, e.execBean, exec); + FileSystem fs = FileSystem.get(appConf); + FileStatus status = fs.getFileStatus(new Path(new URI(loc))); + jb.put("group", status.getGroup()); + jb.put("permission", status.getPermission().toString()); + } catch (Exception e) { + LOG.warn(e.getMessage() + " Couldn't get permissions for " + loc); } - } - - // Format a list of Columns for a create statement - private String makeCols(List cols) { - ArrayList res = new ArrayList(); - for (ColumnDesc col : cols) - res.add(makeOneCol(col)); - return StringUtils.join(res, ", "); - } - - // Format a Column for a create statement - private String makeOneCol(ColumnDesc col) { - String res = String.format("%s %s", col.name, col.type); - if (TempletonUtils.isset(col.comment)) - res += String.format(" comment '%s'", col.comment); - return res; - } - - // Make a create table statement - private String makeCreateTable(String db, TableDesc desc) { - String exec = String.format("use %s; create", db); - - if (desc.external) - exec += " external"; - exec += " table"; - if (desc.ifNotExists) - exec += " if not exists"; - exec += " " + desc.table; - - if (TempletonUtils.isset(desc.columns)) - exec += String.format("(%s)", makeCols(desc.columns)); - if (TempletonUtils.isset(desc.comment)) - exec += String.format(" comment '%s'", desc.comment); - if (TempletonUtils.isset(desc.partitionedBy)) - exec += String.format(" partitioned by (%s)", makeCols(desc.partitionedBy)); - if (desc.clusteredBy != null) - exec += String.format(" clustered by %s", makeClusteredBy(desc.clusteredBy)); - if (desc.format != null) - exec += " " + makeStorageFormat(desc.format); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - if (TempletonUtils.isset(desc.tableProperties)) - exec += String.format(" tblproperties (%s)", - makePropertiesStatement(desc.tableProperties)); - exec += ";"; - - return exec; - } - - // Format a clustered by statement - private String makeClusteredBy(TableDesc.ClusteredByDesc desc) { - String res = String.format("(%s)", StringUtils.join(desc.columnNames, ", ")); - if (TempletonUtils.isset(desc.sortedBy)) - res += String.format(" sorted by (%s)", makeClusterSortList(desc.sortedBy)); - res += String.format(" into %s buckets", desc.numberOfBuckets); - - return res; - } - - // Format a sorted by statement - private String makeClusterSortList(List descs) { - ArrayList res = new ArrayList(); - for (TableDesc.ClusterSortOrderDesc desc : descs) - res.add(makeOneClusterSort(desc)); - return StringUtils.join(res, ", "); - } - - // Format a single cluster sort statement - private String makeOneClusterSort(TableDesc.ClusterSortOrderDesc desc) { - return String.format("%s %s", desc.columnName, desc.order.toString()); - } - - // Format the storage format statements - private String makeStorageFormat(TableDesc.StorageFormatDesc desc) { - String res = ""; - - if (desc.rowFormat != null) - res += makeRowFormat(desc.rowFormat); - if (TempletonUtils.isset(desc.storedAs)) - res += String.format(" stored as %s", desc.storedAs); - if (desc.storedBy != null) - res += " " + makeStoredBy(desc.storedBy); - - return res; - } - - // Format the row format statement - private String makeRowFormat(TableDesc.RowFormatDesc desc) { - String res = - makeTermBy(desc.fieldsTerminatedBy, "fields") - + makeTermBy(desc.collectionItemsTerminatedBy, "collection items") - + makeTermBy(desc.mapKeysTerminatedBy, "map keys") - + makeTermBy(desc.linesTerminatedBy, "lines"); - - if (TempletonUtils.isset(res)) - return "row format delimited" + res; - else if (desc.serde != null) - return makeSerdeFormat(desc.serde); - else - return ""; - } - - // A row format terminated by clause - private String makeTermBy(String sep, String fieldName) { - - if (TempletonUtils.isset(sep)) - return String.format(" %s terminated by '%s'", fieldName, sep); - else - return ""; - } - - // Format the serde statement - private String makeSerdeFormat(TableDesc.SerdeDesc desc) { - String res = "row format serde " + desc.name; - if (TempletonUtils.isset(desc.properties)) - res += String.format(" with serdeproperties (%s)", - makePropertiesStatement(desc.properties)); - return res; - } - - // Format the properties statement - private String makePropertiesStatement(Map properties) { - ArrayList res = new ArrayList(); - for (Map.Entry e : properties.entrySet()) - res.add(String.format("'%s'='%s'", e.getKey(), e.getValue())); - return StringUtils.join(res, ", "); - } - - // Format the stored by statement - private String makeStoredBy(TableDesc.StoredByDesc desc) { - String res = String.format("stored by '%s'", desc.className); - if (TempletonUtils.isset(desc.properties)) - res += String.format(" with serdeproperties (%s)", - makePropertiesStatement(desc.properties)); - return res; - } - - // Pull out the first table from the "show extended" json. - private String singleTable(String json, String table) - throws IOException { - Map obj = JsonBuilder.jsonToMap(json); - if (JsonBuilder.isError(obj)) - return json; - - List tables = (List) obj.get("tables"); - if (TempletonUtils.isset(tables)) - return JsonBuilder.mapToJson(tables.get(0)); - else { - return JsonBuilder - .createError(ErrorMsg.INVALID_TABLE.format(table), - ErrorMsg.INVALID_TABLE.getErrorCode()). - buildJson(); - } - } - - /** - * Drop a table. - */ - public Response dropTable(String user, String db, - String table, boolean ifExists, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; drop table", db); - if (ifExists) - exec += " if exists"; - exec += String.format(" %s;", table); - - try { - String res = jsonRun(user, exec, group, permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to drop table: " + table, e.execBean, exec); - } - } - - /** - * Rename a table. - */ - public Response renameTable(String user, String db, - String oldTable, String newTable, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s rename to %s;", - db, oldTable, newTable); - try { - String res = jsonRun(user, exec, group, permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", newTable) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to rename table: " + oldTable, - e.execBean, exec); - } - } - - /** - * Describe one table property. - */ - public Response descTableProperty(String user, String db, - String table, String property) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - Response res = descTable(user, db, table, true); - if (res.getStatus() != HttpStatus.OK_200) - return res; - Map props = tableProperties(res.getEntity()); - Map found = null; - if (props != null) { - String value = (String) props.get(property); - if (value != null) { - found = new HashMap(); - found.put(property, value); - } - } - - return JsonBuilder.create() - .put("database", db) - .put("table", table) - .put("property", found) - .build(); - } - - /** - * List the table properties. - */ - public Response listTableProperties(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - Response res = descTable(user, db, table, true); - if (res.getStatus() != HttpStatus.OK_200) - return res; - Map props = tableProperties(res.getEntity()); + } + return jb.build(); + } catch (HcatException e) { + throw new HcatException("unable to show table: " + table, e.execBean, exec); + } + } + + // Format a list of Columns for a create statement + private String makeCols(List cols) { + ArrayList res = new ArrayList(); + for (ColumnDesc col : cols) + res.add(makeOneCol(col)); + return StringUtils.join(res, ", "); + } + + // Format a Column for a create statement + private String makeOneCol(ColumnDesc col) { + String res = String.format("%s %s", col.name, col.type); + if (TempletonUtils.isset(col.comment)) + res += String.format(" comment '%s'", col.comment); + return res; + } + + // Make a create table statement + private String makeCreateTable(String db, TableDesc desc) { + String exec = String.format("use %s; create", db); + + if (desc.external) + exec += " external"; + exec += " table"; + if (desc.ifNotExists) + exec += " if not exists"; + exec += " " + desc.table; + + if (TempletonUtils.isset(desc.columns)) + exec += String.format("(%s)", makeCols(desc.columns)); + if (TempletonUtils.isset(desc.comment)) + exec += String.format(" comment '%s'", desc.comment); + if (TempletonUtils.isset(desc.partitionedBy)) + exec += String.format(" partitioned by (%s)", makeCols(desc.partitionedBy)); + if (desc.clusteredBy != null) + exec += String.format(" clustered by %s", makeClusteredBy(desc.clusteredBy)); + if (desc.format != null) + exec += " " + makeStorageFormat(desc.format); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + if (TempletonUtils.isset(desc.tableProperties)) + exec += String.format(" tblproperties (%s)", + makePropertiesStatement(desc.tableProperties)); + exec += ";"; + + return exec; + } + + // Format a clustered by statement + private String makeClusteredBy(TableDesc.ClusteredByDesc desc) { + String res = String.format("(%s)", StringUtils.join(desc.columnNames, ", ")); + if (TempletonUtils.isset(desc.sortedBy)) + res += String.format(" sorted by (%s)", makeClusterSortList(desc.sortedBy)); + res += String.format(" into %s buckets", desc.numberOfBuckets); + + return res; + } + + // Format a sorted by statement + private String makeClusterSortList(List descs) { + ArrayList res = new ArrayList(); + for (TableDesc.ClusterSortOrderDesc desc : descs) + res.add(makeOneClusterSort(desc)); + return StringUtils.join(res, ", "); + } + + // Format a single cluster sort statement + private String makeOneClusterSort(TableDesc.ClusterSortOrderDesc desc) { + return String.format("%s %s", desc.columnName, desc.order.toString()); + } + + // Format the storage format statements + private String makeStorageFormat(TableDesc.StorageFormatDesc desc) { + String res = ""; + + if (desc.rowFormat != null) + res += makeRowFormat(desc.rowFormat); + if (TempletonUtils.isset(desc.storedAs)) + res += String.format(" stored as %s", desc.storedAs); + if (desc.storedBy != null) + res += " " + makeStoredBy(desc.storedBy); + + return res; + } + + // Format the row format statement + private String makeRowFormat(TableDesc.RowFormatDesc desc) { + String res = + makeTermBy(desc.fieldsTerminatedBy, "fields") + + makeTermBy(desc.collectionItemsTerminatedBy, "collection items") + + makeTermBy(desc.mapKeysTerminatedBy, "map keys") + + makeTermBy(desc.linesTerminatedBy, "lines"); + + if (TempletonUtils.isset(res)) + return "row format delimited" + res; + else if (desc.serde != null) + return makeSerdeFormat(desc.serde); + else + return ""; + } + + // A row format terminated by clause + private String makeTermBy(String sep, String fieldName) { + + if (TempletonUtils.isset(sep)) + return String.format(" %s terminated by '%s'", fieldName, sep); + else + return ""; + } + + // Format the serde statement + private String makeSerdeFormat(TableDesc.SerdeDesc desc) { + String res = "row format serde " + desc.name; + if (TempletonUtils.isset(desc.properties)) + res += String.format(" with serdeproperties (%s)", + makePropertiesStatement(desc.properties)); + return res; + } + + // Format the properties statement + private String makePropertiesStatement(Map properties) { + ArrayList res = new ArrayList(); + for (Map.Entry e : properties.entrySet()) + res.add(String.format("'%s'='%s'", e.getKey(), e.getValue())); + return StringUtils.join(res, ", "); + } + + // Format the stored by statement + private String makeStoredBy(TableDesc.StoredByDesc desc) { + String res = String.format("stored by '%s'", desc.className); + if (TempletonUtils.isset(desc.properties)) + res += String.format(" with serdeproperties (%s)", + makePropertiesStatement(desc.properties)); + return res; + } + + // Pull out the first table from the "show extended" json. + private String singleTable(String json, String table) + throws IOException { + Map obj = JsonBuilder.jsonToMap(json); + if (JsonBuilder.isError(obj)) + return json; + + List tables = (List) obj.get("tables"); + if (TempletonUtils.isset(tables)) + return JsonBuilder.mapToJson(tables.get(0)); + else { + return JsonBuilder + .createError(ErrorMsg.INVALID_TABLE.format(table), + ErrorMsg.INVALID_TABLE.getErrorCode()). + buildJson(); + } + } + + /** + * Drop a table. + */ + public Response dropTable(String user, String db, + String table, boolean ifExists, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; drop table", db); + if (ifExists) + exec += " if exists"; + exec += String.format(" %s;", table); + + try { + String res = jsonRun(user, exec, group, permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to drop table: " + table, e.execBean, exec); + } + } + + /** + * Rename a table. + */ + public Response renameTable(String user, String db, + String oldTable, String newTable, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s rename to %s;", + db, oldTable, newTable); + try { + String res = jsonRun(user, exec, group, permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", newTable) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to rename table: " + oldTable, + e.execBean, exec); + } + } + + /** + * Describe one table property. + */ + public Response descTableProperty(String user, String db, + String table, String property) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + Response res = descTable(user, db, table, true); + if (res.getStatus() != HttpStatus.OK_200) + return res; + Map props = tableProperties(res.getEntity()); + Map found = null; + if (props != null) { + String value = (String) props.get(property); + if (value != null) { + found = new HashMap(); + found.put(property, value); + } + } + + return JsonBuilder.create() + .put("database", db) + .put("table", table) + .put("property", found) + .build(); + } + + /** + * List the table properties. + */ + public Response listTableProperties(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + Response res = descTable(user, db, table, true); + if (res.getStatus() != HttpStatus.OK_200) + return res; + Map props = tableProperties(res.getEntity()); + return JsonBuilder.create() + .put("database", db) + .put("table", table) + .put("properties", props) + .build(); + } + + /** + * Add one table property. + */ + public Response addOneTableProperty(String user, String db, String table, + TablePropertyDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec + = String.format("use %s; alter table %s set tblproperties ('%s'='%s');", + db, table, desc.name, desc.value); + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("property", desc.name) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to add table property: " + table, + e.execBean, exec); + } + } + + private Map tableProperties(Object extendedTable) { + if (!(extendedTable instanceof Map)) + return null; + Map m = (Map) extendedTable; + Map tableInfo = (Map) m.get("tableInfo"); + if (tableInfo == null) + return null; + + return (Map) tableInfo.get("parameters"); + } + + /** + * Return a json description of the partitions. + */ + public Response listPartitions(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "use " + db + "; "; + exec += "show partitions " + table + "; "; + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to show partitions for table: " + table, + e.execBean, exec); + } + } + + /** + * Return a json description of one partition. + */ + public Response descOnePartition(String user, String db, String table, + String partition) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "use " + db + "; "; + exec += "show table extended like " + table + + " partition (" + partition + "); "; + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(singleTable(res, table)) + .remove("tableName") + .put("database", db) + .put("table", table) + .put("partition", partition) + .build(); + } catch (HcatException e) { + if (e.execBean.stderr.contains("SemanticException") && + e.execBean.stderr.contains("Partition not found")) { + String emsg = "Partition " + partition + " for table " + + table + " does not exist" + db + "." + table + " does not exist"; return JsonBuilder.create() - .put("database", db) - .put("table", table) - .put("properties", props) - .build(); - } - - /** - * Add one table property. - */ - public Response addOneTableProperty(String user, String db, String table, - TablePropertyDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec - = String.format("use %s; alter table %s set tblproperties ('%s'='%s');", - db, table, desc.name, desc.value); - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("property", desc.name) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to add table property: " + table, - e.execBean, exec); - } - } - - private Map tableProperties(Object extendedTable) { - if (!(extendedTable instanceof Map)) - return null; - Map m = (Map) extendedTable; - Map tableInfo = (Map) m.get("tableInfo"); - if (tableInfo == null) - return null; - - return (Map) tableInfo.get("parameters"); - } - - /** - * Return a json description of the partitions. - */ - public Response listPartitions(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "use " + db + "; "; - exec += "show partitions " + table + "; "; - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to show partitions for table: " + table, - e.execBean, exec); + .put("error", emsg) + //this error should really be produced by Hive (DDLTask) + .put("errorCode", ErrorMsg.INVALID_PARTITION.getErrorCode()) + .put("database", db) + .put("table", table) + .put("partition", partition) + .build(); + } + + throw new HcatException("unable to show partition: " + + table + " " + partition, + e.execBean, + exec); + } + } + + /** + * Add one partition. + */ + public Response addOnePartition(String user, String db, String table, + PartitionDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s add", db, table); + if (desc.ifNotExists) + exec += " if not exists"; + exec += String.format(" partition (%s)", desc.partition); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + exec += ";"; + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + if (res.indexOf("AlreadyExistsException") > -1) { + return JsonBuilder.create(). + put("error", "Partition already exists") + //This error code should really be produced by Hive + .put("errorCode", ErrorMsg.PARTITION_EXISTS.getErrorCode()) + .put("database", db) + .put("table", table) + .put("partition", desc.partition).build(); + } + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("partition", desc.partition) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to add partition: " + desc, + e.execBean, exec); + } + } + + /** + * Drop a partition. + */ + public Response dropPartition(String user, String db, + String table, String partition, boolean ifExists, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s drop", db, table); + if (ifExists) + exec += " if exists"; + exec += String.format(" partition (%s);", partition); + + try { + String res = jsonRun(user, exec, group, permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("partition", partition) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to drop partition: " + partition, + e.execBean, exec); + } + } + + /** + * Return a json description of the columns. Same as + * describeTable. + */ + public Response listColumns(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + try { + return descTable(user, db, table, false); + } catch (HcatException e) { + throw new HcatException("unable to show columns for table: " + table, + e.execBean, e.statement); + } + } + + /** + * Return a json description of one column. + */ + public Response descOneColumn(String user, String db, String table, String column) + throws SimpleWebException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + Response res = listColumns(user, db, table); + if (res.getStatus() != HttpStatus.OK_200) + return res; + + Object o = res.getEntity(); + final Map fields = (o != null && (o instanceof Map)) ? (Map) o : null; + if (fields == null) + throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "Internal error, unable to find column " + + column); + + + List cols = (List) fields.get("columns"); + Map found = null; + if (cols != null) { + for (Map col : cols) { + if (column.equals(col.get("name"))) { + found = col; + break; } - } - - /** - * Return a json description of one partition. - */ - public Response descOnePartition(String user, String db, String table, - String partition) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "use " + db + "; "; - exec += "show table extended like " + table - + " partition (" + partition + "); "; - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(singleTable(res, table)) - .remove("tableName") - .put("database", db) - .put("table", table) - .put("partition", partition) - .build(); - } catch (HcatException e) { - if (e.execBean.stderr.contains("SemanticException") && - e.execBean.stderr.contains("Partition not found")) { - String emsg = "Partition " + partition + " for table " - + table + " does not exist" + db + "." + table + " does not exist"; - return JsonBuilder.create() - .put("error", emsg) - //this error should really be produced by Hive (DDLTask) - .put("errorCode", ErrorMsg.INVALID_PARTITION.getErrorCode()) - .put("database", db) - .put("table", table) - .put("partition", partition) - .build(); - } - - throw new HcatException("unable to show partition: " - + table + " " + partition, - e.execBean, - exec); - } - } - - /** - * Add one partition. - */ - public Response addOnePartition(String user, String db, String table, - PartitionDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s add", db, table); - if (desc.ifNotExists) - exec += " if not exists"; - exec += String.format(" partition (%s)", desc.partition); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - exec += ";"; - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - if (res.indexOf("AlreadyExistsException") > -1) { - return JsonBuilder.create(). - put("error", "Partition already exists") - //This error code should really be produced by Hive - .put("errorCode", ErrorMsg.PARTITION_EXISTS.getErrorCode()) - .put("database", db) - .put("table", table) - .put("partition", desc.partition).build(); - } - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("partition", desc.partition) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to add partition: " + desc, - e.execBean, exec); - } - } - - /** - * Drop a partition. - */ - public Response dropPartition(String user, String db, - String table, String partition, boolean ifExists, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s drop", db, table); - if (ifExists) - exec += " if exists"; - exec += String.format(" partition (%s);", partition); - - try { - String res = jsonRun(user, exec, group, permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("partition", partition) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to drop partition: " + partition, - e.execBean, exec); - } - } - - /** - * Return a json description of the columns. Same as - * describeTable. - */ - public Response listColumns(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - try { - return descTable(user, db, table, false); - } catch (HcatException e) { - throw new HcatException("unable to show columns for table: " + table, - e.execBean, e.statement); - } - } - - /** - * Return a json description of one column. - */ - public Response descOneColumn(String user, String db, String table, String column) - throws SimpleWebException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - Response res = listColumns(user, db, table); - if (res.getStatus() != HttpStatus.OK_200) - return res; - - Object o = res.getEntity(); - final Map fields = (o != null && (o instanceof Map)) ? (Map) o : null; - if (fields == null) - throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "Internal error, unable to find column " - + column); - - - List cols = (List) fields.get("columns"); - Map found = null; - if (cols != null) { - for (Map col : cols) { - if (column.equals(col.get("name"))) { - found = col; - break; - } - } - } - if (found == null) - throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "unable to find column " + column, - new HashMap() { - { - put("description", fields); - } - }); - fields.remove("columns"); - fields.put("column", found); - return Response.fromResponse(res).entity(fields).build(); - } - - /** - * Add one column. - */ - public Response addOneColumn(String user, String db, String table, - ColumnDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s add columns (%s %s", - db, table, desc.name, desc.type); - if (TempletonUtils.isset(desc.comment)) - exec += String.format(" comment '%s'", desc.comment); - exec += ");"; - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("column", desc.name) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to add column: " + desc, - e.execBean, exec); - } - } - - // Check that the hcat result is valid and or has a valid json - // error - private boolean isValid(ExecBean eb, boolean requireEmptyOutput) { - if (eb == null) - return false; - - try { - Map m = JsonBuilder.jsonToMap(eb.stdout); - if (m.containsKey("error")) // This is a valid error message. - return true; - } catch (IOException e) { - return false; - } - - if (eb.exitcode != 0) - return false; - - if (requireEmptyOutput) - if (TempletonUtils.isset(eb.stdout)) - return false; - + } + } + if (found == null) + throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "unable to find column " + column, + new HashMap() { + { + put("description", fields); + } + }); + fields.remove("columns"); + fields.put("column", found); + return Response.fromResponse(res).entity(fields).build(); + } + + /** + * Add one column. + */ + public Response addOneColumn(String user, String db, String table, + ColumnDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s add columns (%s %s", + db, table, desc.name, desc.type); + if (TempletonUtils.isset(desc.comment)) + exec += String.format(" comment '%s'", desc.comment); + exec += ");"; + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("column", desc.name) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to add column: " + desc, + e.execBean, exec); + } + } + + // Check that the hcat result is valid and or has a valid json + // error + private boolean isValid(ExecBean eb, boolean requireEmptyOutput) { + if (eb == null) + return false; + + try { + Map m = JsonBuilder.jsonToMap(eb.stdout); + if (m.containsKey("error")) // This is a valid error message. return true; - } - - // Run an hcat expression and return just the json outout. - private String jsonRun(String user, String exec, - String group, String permissions, - boolean requireEmptyOutput) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - ExecBean res = run(user, exec, true, group, permissions); - - if (!isValid(res, requireEmptyOutput)) - throw new HcatException("Failure calling hcat: " + exec, res, exec); - - return res.stdout; - } - - // Run an hcat expression and return just the json outout. No - // permissions set. - private String jsonRun(String user, String exec) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - return jsonRun(user, exec, null, null); - } - - // Run an hcat expression and return just the json outout. - private String jsonRun(String user, String exec, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - return jsonRun(user, exec, group, permissions, false); - } + } catch (IOException e) { + return false; + } + + if (eb.exitcode != 0) + return false; + + if (requireEmptyOutput) + if (TempletonUtils.isset(eb.stdout)) + return false; + + return true; + } + + // Run an hcat expression and return just the json outout. + private String jsonRun(String user, String exec, + String group, String permissions, + boolean requireEmptyOutput) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + ExecBean res = run(user, exec, true, group, permissions); + + if (!isValid(res, requireEmptyOutput)) + throw new HcatException("Failure calling hcat: " + exec, res, exec); + + return res.stdout; + } + + // Run an hcat expression and return just the json outout. No + // permissions set. + private String jsonRun(String user, String exec) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + return jsonRun(user, exec, null, null); + } + + // Run an hcat expression and return just the json outout. + private String jsonRun(String user, String exec, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + return jsonRun(user, exec, group, permissions, false); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatException.java index 68ad1ad..ec76e7a 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatException.java @@ -26,17 +26,17 @@ * Unable to run hcat on the job. */ public class HcatException extends SimpleWebException { - public ExecBean execBean; - public String statement; + public ExecBean execBean; + public String statement; - public HcatException(String msg, final ExecBean bean, final String statement) { - super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg, new HashMap() { - { - put("exec", bean); - put("statement", statement); - } - }); - execBean = bean; - this.statement = statement; - } + public HcatException(String msg, final ExecBean bean, final String statement) { + super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg, new HashMap() { + { + put("exec", bean); + put("statement", statement); + } + }); + execBean = bean; + this.statement = statement; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java index 1e679b1..f472c47 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java @@ -35,82 +35,82 @@ */ public class HiveDelegator extends LauncherDelegator { - public HiveDelegator(AppConfig appConf) { - super(appConf); - } + public HiveDelegator(AppConfig appConf) { + super(appConf); + } - public EnqueueBean run(String user, - String execute, String srcFile, List defines, - String statusdir, String callback, String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException - { - runAs = user; - List args = makeArgs(execute, srcFile, defines, statusdir, - completedUrl); - - return enqueueController(user, callback, args); - } + public EnqueueBean run(String user, + String execute, String srcFile, List defines, + String statusdir, String callback, String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException + { + runAs = user; + List args = makeArgs(execute, srcFile, defines, statusdir, + completedUrl); + + return enqueueController(user, callback, args); + } + + private List makeArgs(String execute, String srcFile, + List defines, String statusdir, String completedUrl) + throws BadParam, IOException, InterruptedException + { + ArrayList args = new ArrayList(); + try { + args.addAll(makeBasicArgs(execute, srcFile, statusdir, completedUrl)); + args.add("--"); + args.add(appConf.hivePath()); - private List makeArgs(String execute, String srcFile, - List defines, String statusdir, String completedUrl) - throws BadParam, IOException, InterruptedException - { - ArrayList args = new ArrayList(); - try { - args.addAll(makeBasicArgs(execute, srcFile, statusdir, completedUrl)); - args.add("--"); - args.add(appConf.hivePath()); - - args.add("--service"); - args.add("cli"); - - //the token file location as initial hiveconf arg - args.add("--hiveconf"); - args.add(TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - - for (String prop : appConf.getStrings(AppConfig.HIVE_PROPS_NAME)) { - args.add("--hiveconf"); - args.add(prop); - } - for (String prop : defines) { - args.add("--hiveconf"); - args.add(prop); - } - if (TempletonUtils.isset(execute)) { - args.add("-e"); - args.add(execute); - } else if (TempletonUtils.isset(srcFile)) { - args.add("-f"); - args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) - .getName()); - } - } catch (FileNotFoundException e) { - throw new BadParam(e.getMessage()); - } catch (URISyntaxException e) { - throw new BadParam(e.getMessage()); - } - - return args; + args.add("--service"); + args.add("cli"); + + //the token file location as initial hiveconf arg + args.add("--hiveconf"); + args.add(TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); + + for (String prop : appConf.getStrings(AppConfig.HIVE_PROPS_NAME)) { + args.add("--hiveconf"); + args.add(prop); + } + for (String prop : defines) { + args.add("--hiveconf"); + args.add(prop); + } + if (TempletonUtils.isset(execute)) { + args.add("-e"); + args.add(execute); + } else if (TempletonUtils.isset(srcFile)) { + args.add("-f"); + args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) + .getName()); + } + } catch (FileNotFoundException e) { + throw new BadParam(e.getMessage()); + } catch (URISyntaxException e) { + throw new BadParam(e.getMessage()); } - private List makeBasicArgs(String execute, String srcFile, - String statusdir, String completedUrl) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException - { - ArrayList args = new ArrayList(); + return args; + } - ArrayList allFiles = new ArrayList(); - if (TempletonUtils.isset(srcFile)) - allFiles.add(TempletonUtils.hadoopFsFilename(srcFile, appConf, - runAs)); + private List makeBasicArgs(String execute, String srcFile, + String statusdir, String completedUrl) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException + { + ArrayList args = new ArrayList(); - args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); + ArrayList allFiles = new ArrayList(); + if (TempletonUtils.isset(srcFile)) + allFiles.add(TempletonUtils.hadoopFsFilename(srcFile, appConf, + runAs)); - args.add("-archives"); - args.add(appConf.hiveArchive()); + args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); - return args; - } + args.add("-archives"); + args.add(appConf.hiveArchive()); + + return args; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java index 05d74cb..663da94 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java @@ -34,65 +34,65 @@ * This is the backend of the mapreduce/jar web service. */ public class JarDelegator extends LauncherDelegator { - public JarDelegator(AppConfig appConf) { - super(appConf); - } + public JarDelegator(AppConfig appConf) { + super(appConf); + } - public EnqueueBean run(String user, String jar, String mainClass, - String libjars, String files, - List jarArgs, List defines, - String statusdir, String callback, String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException { - runAs = user; - List args = makeArgs(jar, mainClass, - libjars, files, jarArgs, defines, - statusdir, completedUrl); + public EnqueueBean run(String user, String jar, String mainClass, + String libjars, String files, + List jarArgs, List defines, + String statusdir, String callback, String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException { + runAs = user; + List args = makeArgs(jar, mainClass, + libjars, files, jarArgs, defines, + statusdir, completedUrl); - return enqueueController(user, callback, args); - } + return enqueueController(user, callback, args); + } - private List makeArgs(String jar, String mainClass, - String libjars, String files, - List jarArgs, List defines, - String statusdir, String completedUrl) - throws BadParam, IOException, InterruptedException { - ArrayList args = new ArrayList(); - try { - ArrayList allFiles = new ArrayList(); - allFiles.add(TempletonUtils.hadoopFsFilename(jar, appConf, runAs)); + private List makeArgs(String jar, String mainClass, + String libjars, String files, + List jarArgs, List defines, + String statusdir, String completedUrl) + throws BadParam, IOException, InterruptedException { + ArrayList args = new ArrayList(); + try { + ArrayList allFiles = new ArrayList(); + allFiles.add(TempletonUtils.hadoopFsFilename(jar, appConf, runAs)); - args.addAll(makeLauncherArgs(appConf, statusdir, - completedUrl, allFiles)); - args.add("--"); - args.add(appConf.clusterHadoop()); - args.add("jar"); - args.add(TempletonUtils.hadoopFsPath(jar, appConf, runAs).getName()); - if (TempletonUtils.isset(mainClass)) - args.add(mainClass); - if (TempletonUtils.isset(libjars)) { - args.add("-libjars"); - args.add(TempletonUtils.hadoopFsListAsString(libjars, appConf, - runAs)); - } - if (TempletonUtils.isset(files)) { - args.add("-files"); - args.add(TempletonUtils.hadoopFsListAsString(files, appConf, - runAs)); - } - //the token file location comes after mainClass, as a -Dprop=val - args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - - for (String d : defines) - args.add("-D" + d); + args.addAll(makeLauncherArgs(appConf, statusdir, + completedUrl, allFiles)); + args.add("--"); + args.add(appConf.clusterHadoop()); + args.add("jar"); + args.add(TempletonUtils.hadoopFsPath(jar, appConf, runAs).getName()); + if (TempletonUtils.isset(mainClass)) + args.add(mainClass); + if (TempletonUtils.isset(libjars)) { + args.add("-libjars"); + args.add(TempletonUtils.hadoopFsListAsString(libjars, appConf, + runAs)); + } + if (TempletonUtils.isset(files)) { + args.add("-files"); + args.add(TempletonUtils.hadoopFsListAsString(files, appConf, + runAs)); + } + //the token file location comes after mainClass, as a -Dprop=val + args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - args.addAll(jarArgs); - } catch (FileNotFoundException e) { - throw new BadParam(e.getMessage()); - } catch (URISyntaxException e) { - throw new BadParam(e.getMessage()); - } + for (String d : defines) + args.add("-D" + d); - return args; + args.addAll(jarArgs); + } catch (FileNotFoundException e) { + throw new BadParam(e.getMessage()); + } catch (URISyntaxException e) { + throw new BadParam(e.getMessage()); } + + return args; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JsonBuilder.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JsonBuilder.java index 2a1fd75..cb425b7 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JsonBuilder.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JsonBuilder.java @@ -35,160 +35,160 @@ * properties. Only add non-null entries. */ public class JsonBuilder { - private static final Map hiveError2HttpStatusCode = new HashMap(); - - /** - * It's expected that Hive (and thus HCat CLI) will return canonical error msgs/codes. - * Here they are mapped to appropriate HTTP Status Code. - */ - static { - hiveError2HttpStatusCode.put(ErrorMsg.GENERIC_ERROR.getErrorCode(), HttpStatus.INTERNAL_SERVER_ERROR_500); - hiveError2HttpStatusCode.put(ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), HttpStatus.NOT_FOUND_404); - hiveError2HttpStatusCode.put(ErrorMsg.INVALID_TABLE.getErrorCode(), HttpStatus.NOT_FOUND_404); - hiveError2HttpStatusCode.put(ErrorMsg.TABLE_NOT_PARTITIONED.getErrorCode(), HttpStatus.NOT_FOUND_404); - hiveError2HttpStatusCode.put(ErrorMsg.INVALID_PARTITION.getErrorCode(), HttpStatus.NOT_FOUND_404); - - hiveError2HttpStatusCode.put(ErrorMsg.DUPLICATE_COLUMN_NAMES.getErrorCode(), HttpStatus.CONFLICT_409); - hiveError2HttpStatusCode.put(ErrorMsg.DATABSAE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); - hiveError2HttpStatusCode.put(ErrorMsg.PARTITION_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); - hiveError2HttpStatusCode.put(ErrorMsg.TABLE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); - } - - // The map we're building. - private Map map; - - // Parse the json map. - private JsonBuilder(String json) - throws IOException { - map = jsonToMap(json); - } - - /** - * Create a new map object from the existing json. - */ - public static JsonBuilder create(String json) - throws IOException { - return new JsonBuilder(json); - } - - /** - * Create a new map object. - */ - public static JsonBuilder create() - throws IOException { - return new JsonBuilder(null); - } - - /** - * Create a new map error object. - */ - public static JsonBuilder createError(String msg, int errorCode) - throws IOException { - return new JsonBuilder(null) - .put("error", msg) - .put("errorCode", errorCode); - } - - /** - * Add a non-null value to the map. - */ - public JsonBuilder put(String name, Object val) { - if (val != null) - map.put(name, val); - return this; - } - - /** - * Remove a value from the map. - */ - public JsonBuilder remove(String name) { - map.remove(name); - return this; - } - - /** - * Get the underlying map. - */ - public Map getMap() { - return map; - } - - /** - * Turn the map back to response object. - */ - public Response build() { - return buildResponse(); - } - - /** - * Turn the map back to json. - */ - public String buildJson() - throws IOException { - return mapToJson(map); - } - - /** - * Turn the map back to response object. - */ - public Response buildResponse() { - int status = HttpStatus.OK_200; // Server ok. - if (map.containsKey("error")) - status = HttpStatus.INTERNAL_SERVER_ERROR_500; // Generic http server error. - Object o = map.get("errorCode"); - if (o != null) { - if(hiveError2HttpStatusCode.containsKey(o)) { - status = hiveError2HttpStatusCode.get(o); - } - } - return buildResponse(status); - } - - /** - * Turn the map back to response object. - */ - public Response buildResponse(int status) { - return Response.status(status) - .entity(map) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - /** - * Is the object non-empty? - */ - public boolean isset() { - return TempletonUtils.isset(map); - } - - /** - * Check if this is an error doc. - */ - public static boolean isError(Map obj) { - return (obj != null) && obj.containsKey("error"); - } - - /** - * Convert a json string to a Map. - */ - public static Map jsonToMap(String json) - throws IOException { - if (!TempletonUtils.isset(json)) - return new HashMap(); - else { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(json, Map.class); - } - } - - /** - * Convert a map to a json string. - */ - public static String mapToJson(Object obj) - throws IOException { - ObjectMapper mapper = new ObjectMapper(); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - mapper.writeValue(out, obj); - return out.toString(); - } + private static final Map hiveError2HttpStatusCode = new HashMap(); + + /** + * It's expected that Hive (and thus HCat CLI) will return canonical error msgs/codes. + * Here they are mapped to appropriate HTTP Status Code. + */ + static { + hiveError2HttpStatusCode.put(ErrorMsg.GENERIC_ERROR.getErrorCode(), HttpStatus.INTERNAL_SERVER_ERROR_500); + hiveError2HttpStatusCode.put(ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), HttpStatus.NOT_FOUND_404); + hiveError2HttpStatusCode.put(ErrorMsg.INVALID_TABLE.getErrorCode(), HttpStatus.NOT_FOUND_404); + hiveError2HttpStatusCode.put(ErrorMsg.TABLE_NOT_PARTITIONED.getErrorCode(), HttpStatus.NOT_FOUND_404); + hiveError2HttpStatusCode.put(ErrorMsg.INVALID_PARTITION.getErrorCode(), HttpStatus.NOT_FOUND_404); + + hiveError2HttpStatusCode.put(ErrorMsg.DUPLICATE_COLUMN_NAMES.getErrorCode(), HttpStatus.CONFLICT_409); + hiveError2HttpStatusCode.put(ErrorMsg.DATABSAE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); + hiveError2HttpStatusCode.put(ErrorMsg.PARTITION_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); + hiveError2HttpStatusCode.put(ErrorMsg.TABLE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); + } + + // The map we're building. + private Map map; + + // Parse the json map. + private JsonBuilder(String json) + throws IOException { + map = jsonToMap(json); + } + + /** + * Create a new map object from the existing json. + */ + public static JsonBuilder create(String json) + throws IOException { + return new JsonBuilder(json); + } + + /** + * Create a new map object. + */ + public static JsonBuilder create() + throws IOException { + return new JsonBuilder(null); + } + + /** + * Create a new map error object. + */ + public static JsonBuilder createError(String msg, int errorCode) + throws IOException { + return new JsonBuilder(null) + .put("error", msg) + .put("errorCode", errorCode); + } + + /** + * Add a non-null value to the map. + */ + public JsonBuilder put(String name, Object val) { + if (val != null) + map.put(name, val); + return this; + } + + /** + * Remove a value from the map. + */ + public JsonBuilder remove(String name) { + map.remove(name); + return this; + } + + /** + * Get the underlying map. + */ + public Map getMap() { + return map; + } + + /** + * Turn the map back to response object. + */ + public Response build() { + return buildResponse(); + } + + /** + * Turn the map back to json. + */ + public String buildJson() + throws IOException { + return mapToJson(map); + } + + /** + * Turn the map back to response object. + */ + public Response buildResponse() { + int status = HttpStatus.OK_200; // Server ok. + if (map.containsKey("error")) + status = HttpStatus.INTERNAL_SERVER_ERROR_500; // Generic http server error. + Object o = map.get("errorCode"); + if (o != null) { + if(hiveError2HttpStatusCode.containsKey(o)) { + status = hiveError2HttpStatusCode.get(o); + } + } + return buildResponse(status); + } + + /** + * Turn the map back to response object. + */ + public Response buildResponse(int status) { + return Response.status(status) + .entity(map) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + /** + * Is the object non-empty? + */ + public boolean isset() { + return TempletonUtils.isset(map); + } + + /** + * Check if this is an error doc. + */ + public static boolean isError(Map obj) { + return (obj != null) && obj.containsKey("error"); + } + + /** + * Convert a json string to a Map. + */ + public static Map jsonToMap(String json) + throws IOException { + if (!TempletonUtils.isset(json)) + return new HashMap(); + else { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(json, Map.class); + } + } + + /** + * Convert a map to a json string. + */ + public static String mapToJson(Object obj) + throws IOException { + ObjectMapper mapper = new ObjectMapper(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mapper.writeValue(out, obj); + return out.toString(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java index 1fa6be4..75d57eb 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java @@ -41,158 +41,158 @@ * launch child jobs. */ public class LauncherDelegator extends TempletonDelegator { - private static final Log LOG = LogFactory.getLog(LauncherDelegator.class); - protected String runAs = null; - - public LauncherDelegator(AppConfig appConf) { - super(appConf); - } - - public void registerJob(String id, String user, String callback) - throws IOException { - JobState state = null; - try { - state = new JobState(id, Main.getAppConfigInstance()); - state.setUser(user); - state.setCallback(callback); - } finally { - if (state != null) - state.close(); - } + private static final Log LOG = LogFactory.getLog(LauncherDelegator.class); + protected String runAs = null; + + public LauncherDelegator(AppConfig appConf) { + super(appConf); + } + + public void registerJob(String id, String user, String callback) + throws IOException { + JobState state = null; + try { + state = new JobState(id, Main.getAppConfigInstance()); + state.setUser(user); + state.setCallback(callback); + } finally { + if (state != null) + state.close(); } + } - /** - * Enqueue the TempletonControllerJob directly calling doAs. - */ - public EnqueueBean enqueueController(String user, String callback, - List args) - throws NotAuthorizedException, BusyException, ExecuteException, - IOException, QueueException { - try { - UserGroupInformation ugi = UgiFactory.getUgi(user); + /** + * Enqueue the TempletonControllerJob directly calling doAs. + */ + public EnqueueBean enqueueController(String user, String callback, + List args) + throws NotAuthorizedException, BusyException, ExecuteException, + IOException, QueueException { + try { + UserGroupInformation ugi = UgiFactory.getUgi(user); - final long startTime = System.nanoTime(); + final long startTime = System.nanoTime(); - String id = queueAsUser(ugi, args); + String id = queueAsUser(ugi, args); - long elapsed = ((System.nanoTime() - startTime) / ((int) 1e6)); - LOG.debug("queued job " + id + " in " + elapsed + " ms"); + long elapsed = ((System.nanoTime() - startTime) / ((int) 1e6)); + LOG.debug("queued job " + id + " in " + elapsed + " ms"); - if (id == null) - throw new QueueException("Unable to get job id"); + if (id == null) + throw new QueueException("Unable to get job id"); - registerJob(id, user, callback); + registerJob(id, user, callback); - return new EnqueueBean(id); - } catch (InterruptedException e) { - throw new QueueException("Unable to launch job " + e); - } + return new EnqueueBean(id); + } catch (InterruptedException e) { + throw new QueueException("Unable to launch job " + e); } - - private String queueAsUser(UserGroupInformation ugi, final List args) - throws IOException, InterruptedException { - String id = ugi.doAs(new PrivilegedExceptionAction() { - public String run() throws Exception { - String[] array = new String[args.size()]; - TempletonControllerJob ctrl = new TempletonControllerJob(); - ToolRunner.run(ctrl, args.toArray(array)); - return ctrl.getSubmittedId(); - } - }); - - return id; + } + + private String queueAsUser(UserGroupInformation ugi, final List args) + throws IOException, InterruptedException { + String id = ugi.doAs(new PrivilegedExceptionAction() { + public String run() throws Exception { + String[] array = new String[args.size()]; + TempletonControllerJob ctrl = new TempletonControllerJob(); + ToolRunner.run(ctrl, args.toArray(array)); + return ctrl.getSubmittedId(); + } + }); + + return id; + } + + public List makeLauncherArgs(AppConfig appConf, String statusdir, + String completedUrl, + List copyFiles) { + ArrayList args = new ArrayList(); + + args.add("-libjars"); + args.add(appConf.libJars()); + addCacheFiles(args, appConf); + + // Hadoop vars + addDef(args, "user.name", runAs); + addDef(args, AppConfig.HADOOP_SPECULATIVE_NAME, "false"); + addDef(args, AppConfig.HADOOP_CHILD_JAVA_OPTS, appConf.controllerMRChildOpts()); + + // Internal vars + addDef(args, TempletonControllerJob.STATUSDIR_NAME, statusdir); + addDef(args, TempletonControllerJob.COPY_NAME, + TempletonUtils.encodeArray(copyFiles)); + addDef(args, TempletonControllerJob.OVERRIDE_CLASSPATH, + makeOverrideClasspath(appConf)); + + // Hadoop queue information + addDef(args, "mapred.job.queue.name", appConf.hadoopQueueName()); + + // Job vars + addStorageVars(args); + addCompletionVars(args, completedUrl); + + return args; + } + + // Storage vars + private void addStorageVars(List args) { + addDef(args, TempletonStorage.STORAGE_CLASS, + appConf.get(TempletonStorage.STORAGE_CLASS)); + addDef(args, TempletonStorage.STORAGE_ROOT, + appConf.get(TempletonStorage.STORAGE_ROOT)); + addDef(args, ZooKeeperStorage.ZK_HOSTS, + appConf.get(ZooKeeperStorage.ZK_HOSTS)); + addDef(args, ZooKeeperStorage.ZK_SESSION_TIMEOUT, + appConf.get(ZooKeeperStorage.ZK_SESSION_TIMEOUT)); + } + + // Completion notifier vars + private void addCompletionVars(List args, String completedUrl) { + addDef(args, AppConfig.HADOOP_END_RETRY_NAME, + appConf.get(AppConfig.CALLBACK_RETRY_NAME)); + addDef(args, AppConfig.HADOOP_END_INTERVAL_NAME, + appConf.get(AppConfig.CALLBACK_INTERVAL_NAME)); + addDef(args, AppConfig.HADOOP_END_URL_NAME, completedUrl); + } + + /** + * Add files to the Distributed Cache for the controller job. + */ + public static void addCacheFiles(List args, AppConfig appConf) { + String overrides = appConf.overrideJarsString(); + if (overrides != null) { + args.add("-files"); + args.add(overrides); } - - public List makeLauncherArgs(AppConfig appConf, String statusdir, - String completedUrl, - List copyFiles) { - ArrayList args = new ArrayList(); - - args.add("-libjars"); - args.add(appConf.libJars()); - addCacheFiles(args, appConf); - - // Hadoop vars - addDef(args, "user.name", runAs); - addDef(args, AppConfig.HADOOP_SPECULATIVE_NAME, "false"); - addDef(args, AppConfig.HADOOP_CHILD_JAVA_OPTS, appConf.controllerMRChildOpts()); - - // Internal vars - addDef(args, TempletonControllerJob.STATUSDIR_NAME, statusdir); - addDef(args, TempletonControllerJob.COPY_NAME, - TempletonUtils.encodeArray(copyFiles)); - addDef(args, TempletonControllerJob.OVERRIDE_CLASSPATH, - makeOverrideClasspath(appConf)); - - // Hadoop queue information - addDef(args, "mapred.job.queue.name", appConf.hadoopQueueName()); - - // Job vars - addStorageVars(args); - addCompletionVars(args, completedUrl); - - return args; + } + + /** + * Create the override classpath, which will be added to + * HADOOP_CLASSPATH at runtime by the controller job. + */ + public static String makeOverrideClasspath(AppConfig appConf) { + String[] overrides = appConf.overrideJars(); + if (overrides == null) + return null; + + ArrayList cp = new ArrayList(); + for (String fname : overrides) { + Path p = new Path(fname); + cp.add(p.getName()); } - - // Storage vars - private void addStorageVars(List args) { - addDef(args, TempletonStorage.STORAGE_CLASS, - appConf.get(TempletonStorage.STORAGE_CLASS)); - addDef(args, TempletonStorage.STORAGE_ROOT, - appConf.get(TempletonStorage.STORAGE_ROOT)); - addDef(args, ZooKeeperStorage.ZK_HOSTS, - appConf.get(ZooKeeperStorage.ZK_HOSTS)); - addDef(args, ZooKeeperStorage.ZK_SESSION_TIMEOUT, - appConf.get(ZooKeeperStorage.ZK_SESSION_TIMEOUT)); - } - - // Completion notifier vars - private void addCompletionVars(List args, String completedUrl) { - addDef(args, AppConfig.HADOOP_END_RETRY_NAME, - appConf.get(AppConfig.CALLBACK_RETRY_NAME)); - addDef(args, AppConfig.HADOOP_END_INTERVAL_NAME, - appConf.get(AppConfig.CALLBACK_INTERVAL_NAME)); - addDef(args, AppConfig.HADOOP_END_URL_NAME, completedUrl); - } - - /** - * Add files to the Distributed Cache for the controller job. - */ - public static void addCacheFiles(List args, AppConfig appConf) { - String overrides = appConf.overrideJarsString(); - if (overrides != null) { - args.add("-files"); - args.add(overrides); - } - } - - /** - * Create the override classpath, which will be added to - * HADOOP_CLASSPATH at runtime by the controller job. - */ - public static String makeOverrideClasspath(AppConfig appConf) { - String[] overrides = appConf.overrideJars(); - if (overrides == null) - return null; - - ArrayList cp = new ArrayList(); - for (String fname : overrides) { - Path p = new Path(fname); - cp.add(p.getName()); - } - return StringUtils.join(":", cp); - } - - - /** - * Add a Hadoop command line definition to args if the value is - * not null. - */ - public static void addDef(List args, String name, String val) { - if (val != null) { - args.add("-D"); - args.add(name + "=" + val); - } + return StringUtils.join(":", cp); + } + + + /** + * Add a Hadoop command line definition to args if the value is + * not null. + */ + public static void addDef(List args, String name, String val) { + if (val != null) { + args.add("-D"); + args.add(name + "=" + val); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ListDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ListDelegator.java index 8703862..c912638 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ListDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ListDelegator.java @@ -32,44 +32,44 @@ * List jobs owned by a user. */ public class ListDelegator extends TempletonDelegator { - public ListDelegator(AppConfig appConf) { - super(appConf); - } + public ListDelegator(AppConfig appConf) { + super(appConf); + } - public List run(String user, boolean showall) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - UserGroupInformation ugi = UgiFactory.getUgi(user); - WebHCatJTShim tracker = null; - try { - tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); + public List run(String user, boolean showall) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { - ArrayList ids = new ArrayList(); + UserGroupInformation ugi = UgiFactory.getUgi(user); + WebHCatJTShim tracker = null; + try { + tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); - JobStatus[] jobs = tracker.getAllJobs(); + ArrayList ids = new ArrayList(); - if (jobs != null) { - for (JobStatus job : jobs) { - JobState state = null; - try { - String id = job.getJobID().toString(); - state = new JobState(id, Main.getAppConfigInstance()); - if (showall || user.equals(state.getUser())) - ids.add(id); - } finally { - if (state != null) { - state.close(); - } - } - } - } + JobStatus[] jobs = tracker.getAllJobs(); - return ids; - } catch (IllegalStateException e) { - throw new BadParam(e.getMessage()); - } finally { - if (tracker != null) - tracker.close(); + if (jobs != null) { + for (JobStatus job : jobs) { + JobState state = null; + try { + String id = job.getJobID().toString(); + state = new JobState(id, Main.getAppConfigInstance()); + if (showall || user.equals(state.getUser())) + ids.add(id); + } finally { + if (state != null) { + state.close(); + } + } } + } + + return ids; + } catch (IllegalStateException e) { + throw new BadParam(e.getMessage()); + } finally { + if (tracker != null) + tracker.close(); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Main.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Main.java index 82532c6..d84cb60 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Main.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Main.java @@ -46,207 +46,207 @@ * The main executable that starts up and runs the Server. */ public class Main { - public static final String SERVLET_PATH = "templeton"; - private static final Log LOG = LogFactory.getLog(Main.class); - - public static final int DEFAULT_PORT = 8080; - private Server server; - - private static volatile AppConfig conf; - - /** - * Retrieve the config singleton. - */ - public static synchronized AppConfig getAppConfigInstance() { - if (conf == null) - LOG.error("Bug: configuration not yet loaded"); - return conf; - } - - Main(String[] args) { - init(args); - } - - public void init(String[] args) { - initLogger(); - conf = loadConfig(args); - conf.startCleanup(); - LOG.debug("Loaded conf " + conf); - } - - // Jersey uses java.util.logging - bridge to slf4 - private void initLogger() { - java.util.logging.Logger rootLogger - = java.util.logging.LogManager.getLogManager().getLogger(""); - for (java.util.logging.Handler h : rootLogger.getHandlers()) - rootLogger.removeHandler(h); - - SLF4JBridgeHandler.install(); - } - - public AppConfig loadConfig(String[] args) { - AppConfig cf = new AppConfig(); - try { - GenericOptionsParser parser = new GenericOptionsParser(cf, args); - if (parser.getRemainingArgs().length > 0) - usage(); - } catch (IOException e) { - LOG.error("Unable to parse options: " + e); - usage(); - } - - return cf; + public static final String SERVLET_PATH = "templeton"; + private static final Log LOG = LogFactory.getLog(Main.class); + + public static final int DEFAULT_PORT = 8080; + private Server server; + + private static volatile AppConfig conf; + + /** + * Retrieve the config singleton. + */ + public static synchronized AppConfig getAppConfigInstance() { + if (conf == null) + LOG.error("Bug: configuration not yet loaded"); + return conf; + } + + Main(String[] args) { + init(args); + } + + public void init(String[] args) { + initLogger(); + conf = loadConfig(args); + conf.startCleanup(); + LOG.debug("Loaded conf " + conf); + } + + // Jersey uses java.util.logging - bridge to slf4 + private void initLogger() { + java.util.logging.Logger rootLogger + = java.util.logging.LogManager.getLogManager().getLogger(""); + for (java.util.logging.Handler h : rootLogger.getHandlers()) + rootLogger.removeHandler(h); + + SLF4JBridgeHandler.install(); + } + + public AppConfig loadConfig(String[] args) { + AppConfig cf = new AppConfig(); + try { + GenericOptionsParser parser = new GenericOptionsParser(cf, args); + if (parser.getRemainingArgs().length > 0) + usage(); + } catch (IOException e) { + LOG.error("Unable to parse options: " + e); + usage(); } - public void usage() { - System.err.println("usage: templeton [-Dtempleton.port=N] [-D...]"); - System.exit(1); + return cf; + } + + public void usage() { + System.err.println("usage: templeton [-Dtempleton.port=N] [-D...]"); + System.exit(1); + } + + public void run() { + int port = conf.getInt(AppConfig.PORT, DEFAULT_PORT); + try { + checkEnv(); + runServer(port); + System.out.println("templeton: listening on port " + port); + LOG.info("Templeton listening on port " + port); + } catch (Exception e) { + System.err.println("templeton: Server failed to start: " + e.getMessage()); + LOG.fatal("Server failed to start: " + e); + System.exit(1); } - - public void run() { - int port = conf.getInt(AppConfig.PORT, DEFAULT_PORT); - try { - checkEnv(); - runServer(port); - System.out.println("templeton: listening on port " + port); - LOG.info("Templeton listening on port " + port); - } catch (Exception e) { - System.err.println("templeton: Server failed to start: " + e.getMessage()); - LOG.fatal("Server failed to start: " + e); - System.exit(1); - } - } - void stop() { - if(server != null) { - try { - server.stop(); - } - catch(Exception ex) { - LOG.warn("Failed to stop jetty.Server", ex); - } - } + } + void stop() { + if(server != null) { + try { + server.stop(); + } + catch(Exception ex) { + LOG.warn("Failed to stop jetty.Server", ex); + } } + } - private void checkEnv() { - checkCurrentDirPermissions(); - - } - - private void checkCurrentDirPermissions() { - //org.apache.commons.exec.DefaultExecutor requires - // that current directory exists - File pwd = new File("."); - if (!pwd.exists()) { - String msg = "Server failed to start: templeton: Current working directory '.' does not exist!"; - System.err.println(msg); - LOG.fatal(msg); - System.exit(1); - } - } + private void checkEnv() { + checkCurrentDirPermissions(); - public Server runServer(int port) - throws Exception { - - //Authenticate using keytab - if (UserGroupInformation.isSecurityEnabled()) { - UserGroupInformation.loginUserFromKeytab(conf.kerberosPrincipal(), - conf.kerberosKeytab()); - } - - // Create the Jetty server - Server server = new Server(port); - ServletContextHandler root = new ServletContextHandler(server, "/"); - - // Add the Auth filter - FilterHolder fHolder = makeAuthFilter(); - - /* - * We add filters for each of the URIs supported by templeton. - * If we added the entire sub-structure using '/*', the mapreduce - * notification cannot give the callback to templeton in secure mode. - * This is because mapreduce does not use secure credentials for - * callbacks. So jetty would fail the request as unauthorized. - */ - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/ddl/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/pig/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/hive/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/queue/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/mapreduce/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/status/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/version/*", - FilterMapping.REQUEST); - - // Connect Jersey - ServletHolder h = new ServletHolder(new ServletContainer(makeJerseyConfig())); - root.addServlet(h, "/" + SERVLET_PATH + "/*"); - // Add any redirects - addRedirects(server); - - // Start the server - server.start(); - this.server = server; - return server; - } + } - // Configure the AuthFilter with the Kerberos params iff security - // is enabled. - public FilterHolder makeAuthFilter() { - FilterHolder authFilter = new FilterHolder(AuthFilter.class); - if (UserGroupInformation.isSecurityEnabled()) { - authFilter.setInitParameter("dfs.web.authentication.signature.secret", - conf.kerberosSecret()); - authFilter.setInitParameter("dfs.web.authentication.kerberos.principal", - conf.kerberosPrincipal()); - authFilter.setInitParameter("dfs.web.authentication.kerberos.keytab", - conf.kerberosKeytab()); - } - return authFilter; + private void checkCurrentDirPermissions() { + //org.apache.commons.exec.DefaultExecutor requires + // that current directory exists + File pwd = new File("."); + if (!pwd.exists()) { + String msg = "Server failed to start: templeton: Current working directory '.' does not exist!"; + System.err.println(msg); + LOG.fatal(msg); + System.exit(1); } + } - public PackagesResourceConfig makeJerseyConfig() { - PackagesResourceConfig rc - = new PackagesResourceConfig("org.apache.hive.hcatalog.templeton"); - HashMap props = new HashMap(); - props.put("com.sun.jersey.api.json.POJOMappingFeature", "true"); - props.put("com.sun.jersey.config.property.WadlGeneratorConfig", - "org.apache.hive.hcatalog.templeton.WadlConfig"); - rc.setPropertiesAndFeatures(props); + public Server runServer(int port) + throws Exception { - return rc; + //Authenticate using keytab + if (UserGroupInformation.isSecurityEnabled()) { + UserGroupInformation.loginUserFromKeytab(conf.kerberosPrincipal(), + conf.kerberosKeytab()); } - public void addRedirects(Server server) { - RewriteHandler rewrite = new RewriteHandler(); - - RedirectPatternRule redirect = new RedirectPatternRule(); - redirect.setPattern("/templeton/v1/application.wadl"); - redirect.setLocation("/templeton/application.wadl"); - rewrite.addRule(redirect); - - HandlerList handlerlist = new HandlerList(); - ArrayList handlers = new ArrayList(); - - // Any redirect handlers need to be added first - handlers.add(rewrite); - - // Now add all the default handlers - for (Handler handler : server.getHandlers()) { - handlers.add(handler); - } - Handler[] newlist = new Handler[handlers.size()]; - handlerlist.setHandlers(handlers.toArray(newlist)); - server.setHandler(handlerlist); + // Create the Jetty server + Server server = new Server(port); + ServletContextHandler root = new ServletContextHandler(server, "/"); + + // Add the Auth filter + FilterHolder fHolder = makeAuthFilter(); + + /* + * We add filters for each of the URIs supported by templeton. + * If we added the entire sub-structure using '/*', the mapreduce + * notification cannot give the callback to templeton in secure mode. + * This is because mapreduce does not use secure credentials for + * callbacks. So jetty would fail the request as unauthorized. + */ + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/ddl/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/pig/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/hive/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/queue/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/mapreduce/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/status/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/version/*", + FilterMapping.REQUEST); + + // Connect Jersey + ServletHolder h = new ServletHolder(new ServletContainer(makeJerseyConfig())); + root.addServlet(h, "/" + SERVLET_PATH + "/*"); + // Add any redirects + addRedirects(server); + + // Start the server + server.start(); + this.server = server; + return server; + } + + // Configure the AuthFilter with the Kerberos params iff security + // is enabled. + public FilterHolder makeAuthFilter() { + FilterHolder authFilter = new FilterHolder(AuthFilter.class); + if (UserGroupInformation.isSecurityEnabled()) { + authFilter.setInitParameter("dfs.web.authentication.signature.secret", + conf.kerberosSecret()); + authFilter.setInitParameter("dfs.web.authentication.kerberos.principal", + conf.kerberosPrincipal()); + authFilter.setInitParameter("dfs.web.authentication.kerberos.keytab", + conf.kerberosKeytab()); } - - public static void main(String[] args) { - Main templeton = new Main(args); - templeton.run(); + return authFilter; + } + + public PackagesResourceConfig makeJerseyConfig() { + PackagesResourceConfig rc + = new PackagesResourceConfig("org.apache.hive.hcatalog.templeton"); + HashMap props = new HashMap(); + props.put("com.sun.jersey.api.json.POJOMappingFeature", "true"); + props.put("com.sun.jersey.config.property.WadlGeneratorConfig", + "org.apache.hive.hcatalog.templeton.WadlConfig"); + rc.setPropertiesAndFeatures(props); + + return rc; + } + + public void addRedirects(Server server) { + RewriteHandler rewrite = new RewriteHandler(); + + RedirectPatternRule redirect = new RedirectPatternRule(); + redirect.setPattern("/templeton/v1/application.wadl"); + redirect.setLocation("/templeton/application.wadl"); + rewrite.addRule(redirect); + + HandlerList handlerlist = new HandlerList(); + ArrayList handlers = new ArrayList(); + + // Any redirect handlers need to be added first + handlers.add(rewrite); + + // Now add all the default handlers + for (Handler handler : server.getHandlers()) { + handlers.add(handler); } + Handler[] newlist = new Handler[handlers.size()]; + handlerlist.setHandlers(handlers.toArray(newlist)); + server.setHandler(handlerlist); + } + + public static void main(String[] args) { + Main templeton = new Main(args); + templeton.run(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/MaxByteArrayOutputStream.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/MaxByteArrayOutputStream.java index 6f0371a..9250aaa 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/MaxByteArrayOutputStream.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/MaxByteArrayOutputStream.java @@ -24,53 +24,53 @@ * An output stream that will only accept the first N bytes of data. */ public class MaxByteArrayOutputStream extends ByteArrayOutputStream { - /** - * The max number of bytes stored. - */ - private int maxBytes; + /** + * The max number of bytes stored. + */ + private int maxBytes; - /** - * The number of bytes currently stored. - */ - private int nBytes; + /** + * The number of bytes currently stored. + */ + private int nBytes; - /** - * Create. - */ - public MaxByteArrayOutputStream(int maxBytes) { - this.maxBytes = maxBytes; - nBytes = 0; - } + /** + * Create. + */ + public MaxByteArrayOutputStream(int maxBytes) { + this.maxBytes = maxBytes; + nBytes = 0; + } - /** - * Writes the specified byte to this byte array output stream. - * Any bytes after the first maxBytes will be ignored. - * - * @param b the byte to be written. - */ - public synchronized void write(int b) { - if (nBytes < maxBytes) { - ++nBytes; - super.write(b); - } + /** + * Writes the specified byte to this byte array output stream. + * Any bytes after the first maxBytes will be ignored. + * + * @param b the byte to be written. + */ + public synchronized void write(int b) { + if (nBytes < maxBytes) { + ++nBytes; + super.write(b); } + } - /** - * Writes len bytes from the specified byte array - * starting at offset off to this byte array output stream. - * Any bytes after the first maxBytes will be ignored. - * - * @param b the data. - * @param off the start offset in the data. - * @param len the number of bytes to write. - */ - public synchronized void write(byte b[], int off, int len) { - int storable = Math.min(maxBytes - nBytes, len); - if (storable > 0) { - nBytes += storable; - super.write(b, off, storable); - } + /** + * Writes len bytes from the specified byte array + * starting at offset off to this byte array output stream. + * Any bytes after the first maxBytes will be ignored. + * + * @param b the data. + * @param off the start offset in the data. + * @param len the number of bytes to write. + */ + public synchronized void write(byte b[], int off, int len) { + int storable = Math.min(maxBytes - nBytes, len); + if (storable > 0) { + nBytes += storable; + super.write(b, off, storable); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/NotAuthorizedException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/NotAuthorizedException.java index fc1c0e4..36e1d7d 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/NotAuthorizedException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/NotAuthorizedException.java @@ -24,7 +24,7 @@ * Simple "user not found" type exception. */ public class NotAuthorizedException extends SimpleWebException { - public NotAuthorizedException(String msg) { - super(HttpStatus.UNAUTHORIZED_401, msg); - } + public NotAuthorizedException(String msg) { + super(HttpStatus.UNAUTHORIZED_401, msg); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PartitionDesc.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PartitionDesc.java index 6614f57..3bc2200 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PartitionDesc.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PartitionDesc.java @@ -25,14 +25,14 @@ */ @XmlRootElement public class PartitionDesc extends GroupPermissionsDesc { - public String partition; - public String location; - public boolean ifNotExists = false; + public String partition; + public String location; + public boolean ifNotExists = false; - public PartitionDesc() {} + public PartitionDesc() {} - public String toString() { - return String.format("PartitionDesc(partition=%s, location=%s, ifNotExists=%s)", - partition, location, ifNotExists); - } + public String toString() { + return String.format("PartitionDesc(partition=%s, location=%s, ifNotExists=%s)", + partition, location, ifNotExists); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java index b7c1823..14c3fbd 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java @@ -35,63 +35,63 @@ * This is the backend of the pig web service. */ public class PigDelegator extends LauncherDelegator { - public PigDelegator(AppConfig appConf) { - super(appConf); - } + public PigDelegator(AppConfig appConf) { + super(appConf); + } - public EnqueueBean run(String user, - String execute, String srcFile, - List pigArgs, String otherFiles, - String statusdir, String callback, String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException { - runAs = user; - List args = makeArgs(execute, - srcFile, pigArgs, - otherFiles, statusdir, completedUrl); + public EnqueueBean run(String user, + String execute, String srcFile, + List pigArgs, String otherFiles, + String statusdir, String callback, String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException { + runAs = user; + List args = makeArgs(execute, + srcFile, pigArgs, + otherFiles, statusdir, completedUrl); - return enqueueController(user, callback, args); - } + return enqueueController(user, callback, args); + } - private List makeArgs(String execute, String srcFile, - List pigArgs, String otherFiles, - String statusdir, String completedUrl) - throws BadParam, IOException, InterruptedException { - ArrayList args = new ArrayList(); - try { - ArrayList allFiles = new ArrayList(); - if (TempletonUtils.isset(srcFile)) - allFiles.add(TempletonUtils.hadoopFsFilename - (srcFile, appConf, runAs)); - if (TempletonUtils.isset(otherFiles)) { - String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs); - allFiles.addAll(Arrays.asList(ofs)); - } + private List makeArgs(String execute, String srcFile, + List pigArgs, String otherFiles, + String statusdir, String completedUrl) + throws BadParam, IOException, InterruptedException { + ArrayList args = new ArrayList(); + try { + ArrayList allFiles = new ArrayList(); + if (TempletonUtils.isset(srcFile)) + allFiles.add(TempletonUtils.hadoopFsFilename + (srcFile, appConf, runAs)); + if (TempletonUtils.isset(otherFiles)) { + String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs); + allFiles.addAll(Arrays.asList(ofs)); + } - args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); - args.add("-archives"); - args.add(appConf.pigArchive()); + args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); + args.add("-archives"); + args.add(appConf.pigArchive()); - args.add("--"); - args.add(appConf.pigPath()); - //the token file location should be first argument of pig - args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - - args.addAll(pigArgs); - if (TempletonUtils.isset(execute)) { - args.add("-execute"); - args.add(execute); - } else if (TempletonUtils.isset(srcFile)) { - args.add("-file"); - args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) - .getName()); - } - } catch (FileNotFoundException e) { - throw new BadParam(e.getMessage()); - } catch (URISyntaxException e) { - throw new BadParam(e.getMessage()); - } + args.add("--"); + args.add(appConf.pigPath()); + //the token file location should be first argument of pig + args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - return args; + args.addAll(pigArgs); + if (TempletonUtils.isset(execute)) { + args.add("-execute"); + args.add(execute); + } else if (TempletonUtils.isset(srcFile)) { + args.add("-file"); + args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) + .getName()); + } + } catch (FileNotFoundException e) { + throw new BadParam(e.getMessage()); + } catch (URISyntaxException e) { + throw new BadParam(e.getMessage()); } + + return args; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ProxyUserSupport.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ProxyUserSupport.java index b215351..ec5acb4 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ProxyUserSupport.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ProxyUserSupport.java @@ -39,203 +39,203 @@ * call is allowed to impersonate doAs user and is making a call from authorized host. */ final class ProxyUserSupport { - private static final Log LOG = LogFactory.getLog(ProxyUserSupport.class); - private static final String CONF_PROXYUSER_PREFIX = "webhcat.proxyuser."; - private static final String CONF_GROUPS_SUFFIX = ".groups"; - private static final String CONF_HOSTS_SUFFIX = ".hosts"; - private static final Set WILD_CARD = Collections.unmodifiableSet(new HashSet(0)); - private static final Map> proxyUserGroups = new HashMap>(); - private static final Map> proxyUserHosts = new HashMap>(); + private static final Log LOG = LogFactory.getLog(ProxyUserSupport.class); + private static final String CONF_PROXYUSER_PREFIX = "webhcat.proxyuser."; + private static final String CONF_GROUPS_SUFFIX = ".groups"; + private static final String CONF_HOSTS_SUFFIX = ".hosts"; + private static final Set WILD_CARD = Collections.unmodifiableSet(new HashSet(0)); + private static final Map> proxyUserGroups = new HashMap>(); + private static final Map> proxyUserHosts = new HashMap>(); - static void processProxyuserConfig(AppConfig conf) { - for(Map.Entry confEnt : conf) { - if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) - && confEnt.getKey().endsWith(CONF_GROUPS_SUFFIX)) { - //process user groups for which doAs is authorized - String proxyUser = - confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), - confEnt.getKey().lastIndexOf(CONF_GROUPS_SUFFIX)); - Set groups; - if("*".equals(confEnt.getValue())) { - groups = WILD_CARD; - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + "] is authorized to do doAs any user."); - } - } - else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { - groups = new HashSet(Arrays.asList(confEnt.getValue().trim().split(","))); - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + - "] is authorized to do doAs for users in the following groups: [" - + confEnt.getValue().trim() + "]"); - } - } - else { - groups = Collections.emptySet(); - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + - "] is authorized to do doAs for users in the following groups: []"); - } - } - proxyUserGroups.put(proxyUser, groups); - } - else if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) - && confEnt.getKey().endsWith(CONF_HOSTS_SUFFIX)) { - //process hosts from which doAs requests are authorized - String proxyUser = confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), - confEnt.getKey().lastIndexOf(CONF_HOSTS_SUFFIX)); - Set hosts; - if("*".equals(confEnt.getValue())) { - hosts = WILD_CARD; - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + "] is authorized to do doAs from any host."); - } - } - else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { - String[] hostValues = confEnt.getValue().trim().split(","); - hosts = new HashSet(); - for(String hostname : hostValues) { - String nhn = normalizeHostname(hostname); - if(nhn != null) { - hosts.add(nhn); - } - } - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + - "] is authorized to do doAs from the following hosts: [" - + confEnt.getValue().trim() + "]"); - } - } - else { - hosts = Collections.emptySet(); - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser - + "] is authorized to do doAs from the following hosts: []"); - } - } - proxyUserHosts.put(proxyUser, hosts); - } + static void processProxyuserConfig(AppConfig conf) { + for(Map.Entry confEnt : conf) { + if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) + && confEnt.getKey().endsWith(CONF_GROUPS_SUFFIX)) { + //process user groups for which doAs is authorized + String proxyUser = + confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), + confEnt.getKey().lastIndexOf(CONF_GROUPS_SUFFIX)); + Set groups; + if("*".equals(confEnt.getValue())) { + groups = WILD_CARD; + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + "] is authorized to do doAs any user."); + } } - } - /** - * Verifies a that proxyUser is making the request from authorized host and that doAs user - * belongs to one of the groups for which proxyUser is allowed to impersonate users. - * - * @param proxyUser user name of the proxy (logged in) user. - * @param proxyHost host the proxy user is making the request from. - * @param doAsUser user the proxy user is impersonating. - * @throws NotAuthorizedException thrown if the user is not allowed to perform the proxyuser request. - */ - static void validate(String proxyUser, String proxyHost, String doAsUser) throws - NotAuthorizedException { - assertNotEmpty(proxyUser, "proxyUser", - "If you're attempting to use user-impersonation via a proxy user, please make sure that " - + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_HOSTS_SUFFIX + " and " - + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_GROUPS_SUFFIX - + " are configured correctly"); - assertNotEmpty(proxyHost, "proxyHost", - "If you're attempting to use user-impersonation via a proxy user, please make sure that " - + CONF_PROXYUSER_PREFIX + proxyUser + CONF_HOSTS_SUFFIX + " and " - + CONF_PROXYUSER_PREFIX + proxyUser + CONF_GROUPS_SUFFIX - + " are configured correctly"); - assertNotEmpty(doAsUser, Server.DO_AS_PARAM); - LOG.debug(MessageFormat.format("Authorization check proxyuser [{0}] host [{1}] doAs [{2}]", - proxyUser, proxyHost, doAsUser)); - if (proxyUserHosts.containsKey(proxyUser)) { - proxyHost = normalizeHostname(proxyHost); - validateRequestorHost(proxyUser, proxyHost); - validateGroup(proxyUser, doAsUser); + else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { + groups = new HashSet(Arrays.asList(confEnt.getValue().trim().split(","))); + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs for users in the following groups: [" + + confEnt.getValue().trim() + "]"); + } } else { - throw new NotAuthorizedException(MessageFormat.format( - "User [{0}] not defined as proxyuser", proxyUser)); + groups = Collections.emptySet(); + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs for users in the following groups: []"); + } } - } - - private static void validateRequestorHost(String proxyUser, String hostname) throws - NotAuthorizedException { - Set validHosts = proxyUserHosts.get(proxyUser); - if (validHosts == WILD_CARD) { - return; - } - if (validHosts == null || !validHosts.contains(hostname)) { - throw new NotAuthorizedException(MessageFormat.format( - "Unauthorized host [{0}] for proxyuser [{1}]", hostname, proxyUser)); + proxyUserGroups.put(proxyUser, groups); + } + else if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) + && confEnt.getKey().endsWith(CONF_HOSTS_SUFFIX)) { + //process hosts from which doAs requests are authorized + String proxyUser = confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), + confEnt.getKey().lastIndexOf(CONF_HOSTS_SUFFIX)); + Set hosts; + if("*".equals(confEnt.getValue())) { + hosts = WILD_CARD; + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + "] is authorized to do doAs from any host."); + } } - } - - private static void validateGroup(String proxyUser, String doAsUser) throws - NotAuthorizedException { - Set validGroups = proxyUserGroups.get(proxyUser); - if(validGroups == WILD_CARD) { - return; - } - else if(validGroups == null || validGroups.isEmpty()) { - throw new NotAuthorizedException( - MessageFormat.format( - "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", - proxyUser, doAsUser)); - } - Groups groupsInfo = new Groups(Main.getAppConfigInstance()); - try { - List userGroups = groupsInfo.getGroups(doAsUser); - for (String g : validGroups) { - if (userGroups.contains(g)) { - return; - } + else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { + String[] hostValues = confEnt.getValue().trim().split(","); + hosts = new HashSet(); + for(String hostname : hostValues) { + String nhn = normalizeHostname(hostname); + if(nhn != null) { + hosts.add(nhn); } + } + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs from the following hosts: [" + + confEnt.getValue().trim() + "]"); + } } - catch (IOException ex) {//thrown, for example, if there is no such user on the system - LOG.warn(MessageFormat.format("Unable to get list of groups for doAsUser [{0}].", - doAsUser), ex); + else { + hosts = Collections.emptySet(); + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs from the following hosts: []"); + } } - throw new NotAuthorizedException( - MessageFormat.format( - "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", - proxyUser, doAsUser)); + proxyUserHosts.put(proxyUser, hosts); + } + } + } + /** + * Verifies a that proxyUser is making the request from authorized host and that doAs user + * belongs to one of the groups for which proxyUser is allowed to impersonate users. + * + * @param proxyUser user name of the proxy (logged in) user. + * @param proxyHost host the proxy user is making the request from. + * @param doAsUser user the proxy user is impersonating. + * @throws NotAuthorizedException thrown if the user is not allowed to perform the proxyuser request. + */ + static void validate(String proxyUser, String proxyHost, String doAsUser) throws + NotAuthorizedException { + assertNotEmpty(proxyUser, "proxyUser", + "If you're attempting to use user-impersonation via a proxy user, please make sure that " + + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_HOSTS_SUFFIX + " and " + + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_GROUPS_SUFFIX + + " are configured correctly"); + assertNotEmpty(proxyHost, "proxyHost", + "If you're attempting to use user-impersonation via a proxy user, please make sure that " + + CONF_PROXYUSER_PREFIX + proxyUser + CONF_HOSTS_SUFFIX + " and " + + CONF_PROXYUSER_PREFIX + proxyUser + CONF_GROUPS_SUFFIX + + " are configured correctly"); + assertNotEmpty(doAsUser, Server.DO_AS_PARAM); + LOG.debug(MessageFormat.format("Authorization check proxyuser [{0}] host [{1}] doAs [{2}]", + proxyUser, proxyHost, doAsUser)); + if (proxyUserHosts.containsKey(proxyUser)) { + proxyHost = normalizeHostname(proxyHost); + validateRequestorHost(proxyUser, proxyHost); + validateGroup(proxyUser, doAsUser); + } + else { + throw new NotAuthorizedException(MessageFormat.format( + "User [{0}] not defined as proxyuser", proxyUser)); } + } - private static String normalizeHostname(String name) { - try { - InetAddress address = InetAddress.getByName( - "localhost".equalsIgnoreCase(name) ? null : name); - return address.getCanonicalHostName(); - } - catch (UnknownHostException ex) { - LOG.warn(MessageFormat.format("Unable to normalize hostname [{0}]", name)); - return null; - } + private static void validateRequestorHost(String proxyUser, String hostname) throws + NotAuthorizedException { + Set validHosts = proxyUserHosts.get(proxyUser); + if (validHosts == WILD_CARD) { + return; } - /** - * Check that a string is not null and not empty. If null or empty - * throws an IllegalArgumentException. - * - * @param str value. - * @param name parameter name for the exception message. - * @return the given value. - */ - private static String assertNotEmpty(String str, String name) { - return assertNotEmpty(str, name, null); + if (validHosts == null || !validHosts.contains(hostname)) { + throw new NotAuthorizedException(MessageFormat.format( + "Unauthorized host [{0}] for proxyuser [{1}]", hostname, proxyUser)); } + } - /** - * Check that a string is not null and not empty. If null or empty - * throws an IllegalArgumentException. - * - * @param str value. - * @param name parameter name for the exception message. - * @param info additional information to be printed with the exception message - * @return the given value. - */ - private static String assertNotEmpty(String str, String name, String info) { - if (str == null) { - throw new IllegalArgumentException( - name + " cannot be null" + (info == null ? "" : ", " + info)); - } - if (str.length() == 0) { - throw new IllegalArgumentException( - name + " cannot be empty" + (info == null ? "" : ", " + info)); + private static void validateGroup(String proxyUser, String doAsUser) throws + NotAuthorizedException { + Set validGroups = proxyUserGroups.get(proxyUser); + if(validGroups == WILD_CARD) { + return; + } + else if(validGroups == null || validGroups.isEmpty()) { + throw new NotAuthorizedException( + MessageFormat.format( + "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", + proxyUser, doAsUser)); + } + Groups groupsInfo = new Groups(Main.getAppConfigInstance()); + try { + List userGroups = groupsInfo.getGroups(doAsUser); + for (String g : validGroups) { + if (userGroups.contains(g)) { + return; } - return str; + } + } + catch (IOException ex) {//thrown, for example, if there is no such user on the system + LOG.warn(MessageFormat.format("Unable to get list of groups for doAsUser [{0}].", + doAsUser), ex); + } + throw new NotAuthorizedException( + MessageFormat.format( + "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", + proxyUser, doAsUser)); + } + + private static String normalizeHostname(String name) { + try { + InetAddress address = InetAddress.getByName( + "localhost".equalsIgnoreCase(name) ? null : name); + return address.getCanonicalHostName(); + } + catch (UnknownHostException ex) { + LOG.warn(MessageFormat.format("Unable to normalize hostname [{0}]", name)); + return null; + } + } + /** + * Check that a string is not null and not empty. If null or empty + * throws an IllegalArgumentException. + * + * @param str value. + * @param name parameter name for the exception message. + * @return the given value. + */ + private static String assertNotEmpty(String str, String name) { + return assertNotEmpty(str, name, null); + } + + /** + * Check that a string is not null and not empty. If null or empty + * throws an IllegalArgumentException. + * + * @param str value. + * @param name parameter name for the exception message. + * @param info additional information to be printed with the exception message + * @return the given value. + */ + private static String assertNotEmpty(String str, String name, String info) { + if (str == null) { + throw new IllegalArgumentException( + name + " cannot be null" + (info == null ? "" : ", " + info)); + } + if (str.length() == 0) { + throw new IllegalArgumentException( + name + " cannot be empty" + (info == null ? "" : ", " + info)); } + return str; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueException.java index a35be92..f417851 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueException.java @@ -24,8 +24,8 @@ * Unable to queue the job */ public class QueueException extends SimpleWebException { - public QueueException(String msg) { - super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg); - } + public QueueException(String msg) { + super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueStatusBean.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueStatusBean.java index 6e9ded8..509ac1f 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueStatusBean.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueStatusBean.java @@ -28,40 +28,40 @@ * QueueStatusBean - The results of an exec call. */ public class QueueStatusBean { - public JobStatus status; - public JobProfile profile; + public JobStatus status; + public JobProfile profile; - public String id; - public String parentId; - public String percentComplete; - public Long exitValue; - public String user; - public String callback; - public String completed; + public String id; + public String parentId; + public String percentComplete; + public Long exitValue; + public String user; + public String callback; + public String completed; - public QueueStatusBean() { - } + public QueueStatusBean() { + } - /** - * Create a new QueueStatusBean - * - * @param state store job state - * @param status job status - * @param profile job profile - */ - public QueueStatusBean(JobState state, JobStatus status, JobProfile profile) - throws IOException { - this.status = status; - this.profile = profile; + /** + * Create a new QueueStatusBean + * + * @param state store job state + * @param status job status + * @param profile job profile + */ + public QueueStatusBean(JobState state, JobStatus status, JobProfile profile) + throws IOException { + this.status = status; + this.profile = profile; - id = profile.getJobID().toString(); - parentId = state.getId(); - if (id.equals(parentId)) - parentId = null; - percentComplete = state.getPercentComplete(); - exitValue = state.getExitValue(); - user = state.getUser(); - callback = state.getCallback(); - completed = state.getCompleteStatus(); - } + id = profile.getJobID().toString(); + parentId = state.getId(); + if (id.equals(parentId)) + parentId = null; + percentComplete = state.getPercentComplete(); + exitValue = state.getExitValue(); + user = state.getUser(); + callback = state.getCallback(); + completed = state.getCompleteStatus(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java index 8dbcf9a..663aa58 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java @@ -43,149 +43,149 @@ * use these methods, it's a noop if security is not enabled. */ public class SecureProxySupport { - private Path tokenPath; - private final String HCAT_SERVICE = "hcat"; - private boolean isEnabled; - private String user; + private Path tokenPath; + private final String HCAT_SERVICE = "hcat"; + private boolean isEnabled; + private String user; + + public SecureProxySupport() { + isEnabled = UserGroupInformation.isSecurityEnabled(); + } + + private static final Log LOG = LogFactory.getLog(SecureProxySupport.class); + + /** + * The file where we store the auth token + */ + public Path getTokenPath() { + return (tokenPath); + } + + /** + * The token to pass to hcat. + */ + public String getHcatServiceStr() { + return (HCAT_SERVICE); + } + + /** + * Create the delegation token. + */ + public Path open(String user, Configuration conf) + throws IOException, InterruptedException { + close(); + if (isEnabled) { + this.user = user; + File t = File.createTempFile("templeton", null); + tokenPath = new Path(t.toURI()); + Token fsToken = getFSDelegationToken(user, conf); + String hcatTokenStr; + try { + hcatTokenStr = buildHcatDelegationToken(user); + } catch (Exception e) { + throw new IOException(e); + } + Token msToken = new Token(); + msToken.decodeFromUrlString(hcatTokenStr); + msToken.setService(new Text(HCAT_SERVICE)); + writeProxyDelegationTokens(fsToken, msToken, conf, user, tokenPath); - public SecureProxySupport() { - isEnabled = UserGroupInformation.isSecurityEnabled(); } - - private static final Log LOG = LogFactory.getLog(SecureProxySupport.class); - - /** - * The file where we store the auth token - */ - public Path getTokenPath() { - return (tokenPath); - } - - /** - * The token to pass to hcat. - */ - public String getHcatServiceStr() { - return (HCAT_SERVICE); - } - - /** - * Create the delegation token. - */ - public Path open(String user, Configuration conf) - throws IOException, InterruptedException { - close(); - if (isEnabled) { - this.user = user; - File t = File.createTempFile("templeton", null); - tokenPath = new Path(t.toURI()); - Token fsToken = getFSDelegationToken(user, conf); - String hcatTokenStr; - try { - hcatTokenStr = buildHcatDelegationToken(user); - } catch (Exception e) { - throw new IOException(e); - } - Token msToken = new Token(); - msToken.decodeFromUrlString(hcatTokenStr); - msToken.setService(new Text(HCAT_SERVICE)); - writeProxyDelegationTokens(fsToken, msToken, conf, user, tokenPath); - - } - return tokenPath; - } - - /** - * Cleanup - */ - public void close() { - if (tokenPath != null) { - new File(tokenPath.toUri()).delete(); - tokenPath = null; - } - } - - /** - * Add Hadoop env variables. - */ - public void addEnv(Map env) { - if (isEnabled) { - env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, - getTokenPath().toUri().getPath()); - } - } - - /** - * Add hcat args. - */ - public void addArgs(List args) { - if (isEnabled) { - args.add("-D"); - args.add("hive.metastore.token.signature=" + getHcatServiceStr()); - args.add("-D"); - args.add("proxy.user.name=" + user); - } + return tokenPath; + } + + /** + * Cleanup + */ + public void close() { + if (tokenPath != null) { + new File(tokenPath.toUri()).delete(); + tokenPath = null; } - - class TokenWrapper { - Token token; - } - - private Token getFSDelegationToken(String user, - final Configuration conf) - throws IOException, InterruptedException { - LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); - final UserGroupInformation ugi = UgiFactory.getUgi(user); - - final TokenWrapper twrapper = new TokenWrapper(); - ugi.doAs(new PrivilegedExceptionAction() { - public Object run() throws IOException { - FileSystem fs = FileSystem.get(conf); - twrapper.token = fs.getDelegationToken(ugi.getShortUserName()); - return null; - } - }); - return twrapper.token; - - } - - private void writeProxyDelegationTokens(final Token fsToken, - final Token msToken, - final Configuration conf, - String user, - final Path tokenPath) - throws IOException, InterruptedException { - - - LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); - final UserGroupInformation ugi = UgiFactory.getUgi(user); - - - ugi.doAs(new PrivilegedExceptionAction() { - public Object run() throws IOException { - Credentials cred = new Credentials(); - cred.addToken(fsToken.getService(), fsToken); - cred.addToken(msToken.getService(), msToken); - cred.writeTokenStorageFile(tokenPath, conf); - return null; - } - }); - + } + + /** + * Add Hadoop env variables. + */ + public void addEnv(Map env) { + if (isEnabled) { + env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, + getTokenPath().toUri().getPath()); } - - private String buildHcatDelegationToken(String user) - throws IOException, InterruptedException, MetaException, TException { - HiveConf c = new HiveConf(); - final HiveMetaStoreClient client = new HiveMetaStoreClient(c); - LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); - final TokenWrapper twrapper = new TokenWrapper(); - final UserGroupInformation ugi = UgiFactory.getUgi(user); - String s = ugi.doAs(new PrivilegedExceptionAction() { - public String run() - throws IOException, MetaException, TException { - String u = ugi.getUserName(); - return client.getDelegationToken(u); - } - }); - return s; + } + + /** + * Add hcat args. + */ + public void addArgs(List args) { + if (isEnabled) { + args.add("-D"); + args.add("hive.metastore.token.signature=" + getHcatServiceStr()); + args.add("-D"); + args.add("proxy.user.name=" + user); } + } + + class TokenWrapper { + Token token; + } + + private Token getFSDelegationToken(String user, + final Configuration conf) + throws IOException, InterruptedException { + LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); + final UserGroupInformation ugi = UgiFactory.getUgi(user); + + final TokenWrapper twrapper = new TokenWrapper(); + ugi.doAs(new PrivilegedExceptionAction() { + public Object run() throws IOException { + FileSystem fs = FileSystem.get(conf); + twrapper.token = fs.getDelegationToken(ugi.getShortUserName()); + return null; + } + }); + return twrapper.token; + + } + + private void writeProxyDelegationTokens(final Token fsToken, + final Token msToken, + final Configuration conf, + String user, + final Path tokenPath) + throws IOException, InterruptedException { + + + LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); + final UserGroupInformation ugi = UgiFactory.getUgi(user); + + + ugi.doAs(new PrivilegedExceptionAction() { + public Object run() throws IOException { + Credentials cred = new Credentials(); + cred.addToken(fsToken.getService(), fsToken); + cred.addToken(msToken.getService(), msToken); + cred.writeTokenStorageFile(tokenPath, conf); + return null; + } + }); + + } + + private String buildHcatDelegationToken(String user) + throws IOException, InterruptedException, MetaException, TException { + HiveConf c = new HiveConf(); + final HiveMetaStoreClient client = new HiveMetaStoreClient(c); + LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); + final TokenWrapper twrapper = new TokenWrapper(); + final UserGroupInformation ugi = UgiFactory.getUgi(user); + String s = ugi.doAs(new PrivilegedExceptionAction() { + public String run() + throws IOException, MetaException, TException { + String u = ugi.getUserName(); + return client.getDelegationToken(u); + } + }); + return s; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java index fefb259..3fcae2b 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java @@ -57,800 +57,800 @@ */ @Path("/v1") public class Server { - public static final String VERSION = "v1"; - public static final String DO_AS_PARAM = "doAs"; - - /** - * The status message. Always "ok" - */ - public static final Map STATUS_OK = createStatusMsg(); - - /** - * The list of supported api versions. - */ - public static final Map SUPPORTED_VERSIONS = createVersions(); - - /** - * The list of supported return formats. Always json. - */ - public static final Map SUPPORTED_FORMATS = createFormats(); - - // Build the status message for the /status call. - private static Map createStatusMsg() { - HashMap res = new HashMap(); - res.put("status", "ok"); - res.put("version", VERSION); - - return Collections.unmodifiableMap(res); - } - - // Build the versions list. - private static Map createVersions() { - ArrayList versions = new ArrayList(); - versions.add(VERSION); - - HashMap res = new HashMap(); - res.put("supportedVersions", versions); - res.put("version", VERSION); - - return Collections.unmodifiableMap(res); - } - - // Build the supported formats list - private static Map createFormats() { - ArrayList formats = new ArrayList(); - formats.add(MediaType.APPLICATION_JSON); - HashMap res = new HashMap(); - res.put("responseTypes", formats); - - return Collections.unmodifiableMap(res); - } - - protected static ExecService execService = ExecServiceImpl.getInstance(); - private static AppConfig appConf = Main.getAppConfigInstance(); - - // The SecurityContext set by AuthFilter - private - @Context - SecurityContext theSecurityContext; - - // The uri requested - private - @Context - UriInfo theUriInfo; - private @QueryParam(DO_AS_PARAM) String doAs; - private @Context HttpServletRequest request; - - private static final Log LOG = LogFactory.getLog(Server.class); - - /** - * Check the status of this server. Always OK. - */ - @GET - @Path("status") - @Produces({MediaType.APPLICATION_JSON}) - public Map status() { - return STATUS_OK; - } - - /** - * Check the supported request formats of this server. - */ - @GET - @Produces({MediaType.APPLICATION_JSON}) - public Map requestFormats() { - return SUPPORTED_FORMATS; - } - - /** - * Check the version(s) supported by this server. - */ - @GET - @Path("version") - @Produces({MediaType.APPLICATION_JSON}) - public Map version() { - return SUPPORTED_VERSIONS; - } - - /** - * Execute an hcat ddl expression on the local box. It is run - * as the authenticated user and rate limited. - */ - @POST - @Path("ddl") - @Produces({MediaType.APPLICATION_JSON}) - public ExecBean ddl(@FormParam("exec") String exec, - @FormParam("group") String group, - @FormParam("permissions") String permissions) - throws NotAuthorizedException, BusyException, BadParam, - ExecuteException, IOException { - verifyUser(); - verifyParam(exec, "exec"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.run(getDoAsUser(), exec, false, group, permissions); - } - - /** - * List all the tables in an hcat database. - */ - @GET - @Path("ddl/database/{db}/table") - @Produces(MediaType.APPLICATION_JSON) - public Response listTables(@PathParam("db") String db, - @QueryParam("like") String tablePattern) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - if (!TempletonUtils.isset(tablePattern)) - tablePattern = "*"; - return d.listTables(getDoAsUser(), db, tablePattern); - } - - /** - * Create a new table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response createTable(@PathParam("db") String db, - @PathParam("table") String table, - TableDesc desc) - throws SimpleWebException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - desc.table = table; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.createTable(getDoAsUser(), db, desc); - } - - /** - * Create a new table like another table. - */ - @PUT - @Path("ddl/database/{db}/table/{existingTable}/like/{newTable}") - @Produces(MediaType.APPLICATION_JSON) - public Response createTableLike(@PathParam("db") String db, - @PathParam("existingTable") String existingTable, - @PathParam("newTable") String newTable, - TableLikeDesc desc) - throws SimpleWebException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(existingTable, ":existingTable"); - verifyDdlParam(newTable, ":newTable"); - desc.existingTable = existingTable; - desc.newTable = newTable; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.createTableLike(getDoAsUser(), db, desc); - } - - /** - * Describe an hcat table. This is normally a simple list of - * columns (using "desc table"), but the extended format will show - * more information (using "show table extended like"). - */ - @GET - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response descTable(@PathParam("db") String db, - @PathParam("table") String table, - @QueryParam("format") String format) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - if ("extended".equals(format)) - return d.descExtendedTable(getDoAsUser(), db, table); - else - return d.descTable(getDoAsUser(), db, table, false); - } - - /** - * Drop an hcat table. - */ - @DELETE - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response dropTable(@PathParam("db") String db, - @PathParam("table") String table, - @QueryParam("ifExists") boolean ifExists, - @QueryParam("group") String group, - @QueryParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.dropTable(getDoAsUser(), db, table, ifExists, group, permissions); - } - - /** - * Rename an hcat table. - */ - @POST - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response renameTable(@PathParam("db") String db, - @PathParam("table") String oldTable, - @FormParam("rename") String newTable, - @FormParam("group") String group, - @FormParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(oldTable, ":table"); - verifyDdlParam(newTable, "rename"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.renameTable(getDoAsUser(), db, oldTable, newTable, group, permissions); - } - - /** - * Describe a single property on an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/property/{property}") - @Produces(MediaType.APPLICATION_JSON) - public Response descOneTableProperty(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("property") String property) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyDdlParam(property, ":property"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descTableProperty(getDoAsUser(), db, table, property); - } - - /** - * List all the properties on an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/property") - @Produces(MediaType.APPLICATION_JSON) - public Response listTableProperties(@PathParam("db") String db, - @PathParam("table") String table) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.listTableProperties(getDoAsUser(), db, table); - } - - /** - * Add a single property on an hcat table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}/property/{property}") - @Produces(MediaType.APPLICATION_JSON) - public Response addOneTableProperty(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("property") String property, - TablePropertyDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyDdlParam(property, ":property"); - desc.name = property; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.addOneTableProperty(getDoAsUser(), db, table, desc); - } - - /** - * List all the partitions in an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/partition") - @Produces(MediaType.APPLICATION_JSON) - public Response listPartitions(@PathParam("db") String db, - @PathParam("table") String table) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.listPartitions(getDoAsUser(), db, table); - } - - /** - * Describe a single partition in an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/partition/{partition}") - @Produces(MediaType.APPLICATION_JSON) - public Response descPartition(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("partition") String partition) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(partition, ":partition"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descOnePartition(getDoAsUser(), db, table, partition); - } - - /** - * Create a partition in an hcat table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}/partition/{partition}") - @Produces(MediaType.APPLICATION_JSON) - public Response addOnePartition(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("partition") String partition, - PartitionDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(partition, ":partition"); - desc.partition = partition; - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.addOnePartition(getDoAsUser(), db, table, desc); - } - - /** - * Drop a partition in an hcat table. - */ - @DELETE - @Path("ddl/database/{db}/table/{table}/partition/{partition}") - @Produces(MediaType.APPLICATION_JSON) - public Response dropPartition(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("partition") String partition, - @QueryParam("ifExists") boolean ifExists, - @QueryParam("group") String group, - @QueryParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(partition, ":partition"); - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.dropPartition(getDoAsUser(), db, table, partition, ifExists, - group, permissions); - } - - /** - * List all databases, or those that match a pattern. - */ - @GET - @Path("ddl/database/") - @Produces(MediaType.APPLICATION_JSON) - public Response listDatabases(@QueryParam("like") String dbPattern) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - - HcatDelegator d = new HcatDelegator(appConf, execService); - if (!TempletonUtils.isset(dbPattern)) - dbPattern = "*"; - return d.listDatabases(getDoAsUser(), dbPattern); - } - - /** - * Describe a database - */ - @GET - @Path("ddl/database/{db}") - @Produces(MediaType.APPLICATION_JSON) - public Response descDatabase(@PathParam("db") String db, - @QueryParam("format") String format) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descDatabase(getDoAsUser(), db, "extended".equals(format)); - } - - /** - * Create a database - */ - @PUT - @Path("ddl/database/{db}") - @Produces(MediaType.APPLICATION_JSON) - public Response createDatabase(@PathParam("db") String db, - DatabaseDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - desc.database = db; - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.createDatabase(getDoAsUser(), desc); - } - - /** - * Drop a database - */ - @DELETE - @Path("ddl/database/{db}") - @Produces(MediaType.APPLICATION_JSON) - public Response dropDatabase(@PathParam("db") String db, - @QueryParam("ifExists") boolean ifExists, - @QueryParam("option") String option, - @QueryParam("group") String group, - @QueryParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - if (TempletonUtils.isset(option)) - verifyDdlParam(option, "option"); - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.dropDatabase(getDoAsUser(), db, ifExists, option, - group, permissions); - } - - /** - * List the columns in an hcat table. Currently the same as - * describe table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/column") - @Produces(MediaType.APPLICATION_JSON) - public Response listColumns(@PathParam("db") String db, - @PathParam("table") String table) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.listColumns(getDoAsUser(), db, table); - } - - /** - * Describe a single column in an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/column/{column}") - @Produces(MediaType.APPLICATION_JSON) - public Response descColumn(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("column") String column) - throws SimpleWebException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(column, ":column"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descOneColumn(getDoAsUser(), db, table, column); - } - - /** - * Create a column in an hcat table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}/column/{column}") - @Produces(MediaType.APPLICATION_JSON) - public Response addOneColumn(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("column") String column, - ColumnDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(column, ":column"); - verifyParam(desc.type, "type"); - desc.name = column; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.addOneColumn(getDoAsUser(), db, table, desc); - } - - /** - * Run a MapReduce Streaming job. - */ - @POST - @Path("mapreduce/streaming") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean mapReduceStreaming(@FormParam("input") List inputs, - @FormParam("output") String output, - @FormParam("mapper") String mapper, - @FormParam("reducer") String reducer, - @FormParam("file") List files, - @FormParam("define") List defines, - @FormParam("cmdenv") List cmdenvs, - @FormParam("arg") List args, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - verifyParam(inputs, "input"); - verifyParam(mapper, "mapper"); - verifyParam(reducer, "reducer"); - - StreamingDelegator d = new StreamingDelegator(appConf); - return d.run(getDoAsUser(), inputs, output, mapper, reducer, - files, defines, cmdenvs, args, - statusdir, callback, getCompletedUrl()); - } - - /** - * Run a MapReduce Jar job. - */ - @POST - @Path("mapreduce/jar") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean mapReduceJar(@FormParam("jar") String jar, - @FormParam("class") String mainClass, - @FormParam("libjars") String libjars, - @FormParam("files") String files, - @FormParam("arg") List args, - @FormParam("define") List defines, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - verifyParam(jar, "jar"); - verifyParam(mainClass, "class"); - - JarDelegator d = new JarDelegator(appConf); - return d.run(getDoAsUser(), - jar, mainClass, - libjars, files, args, defines, - statusdir, callback, getCompletedUrl()); - } - - /** - * Run a Pig job. - */ - @POST - @Path("pig") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean pig(@FormParam("execute") String execute, - @FormParam("file") String srcFile, - @FormParam("arg") List pigArgs, - @FormParam("files") String otherFiles, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - if (execute == null && srcFile == null) - throw new BadParam("Either execute or file parameter required"); - - PigDelegator d = new PigDelegator(appConf); - return d.run(getDoAsUser(), - execute, srcFile, - pigArgs, otherFiles, - statusdir, callback, getCompletedUrl()); - } - - /** - * Run a Hive job. - */ - @POST - @Path("hive") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean hive(@FormParam("execute") String execute, - @FormParam("file") String srcFile, - @FormParam("define") List defines, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - if (execute == null && srcFile == null) - throw new BadParam("Either execute or file parameter required"); - - HiveDelegator d = new HiveDelegator(appConf); - return d.run(getDoAsUser(), execute, srcFile, defines, - statusdir, callback, getCompletedUrl()); - } - - /** - * Return the status of the jobid. - */ - @GET - @Path("queue/{jobid}") - @Produces({MediaType.APPLICATION_JSON}) - public QueueStatusBean showQueueId(@PathParam("jobid") String jobid) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - verifyUser(); - verifyParam(jobid, ":jobid"); - - StatusDelegator d = new StatusDelegator(appConf); - return d.run(getDoAsUser(), jobid); - } - - /** - * Kill a job in the queue. - */ - @DELETE - @Path("queue/{jobid}") - @Produces({MediaType.APPLICATION_JSON}) - public QueueStatusBean deleteQueueId(@PathParam("jobid") String jobid) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - verifyUser(); - verifyParam(jobid, ":jobid"); - - DeleteDelegator d = new DeleteDelegator(appConf); - return d.run(getDoAsUser(), jobid); - } - - /** - * Return all the known job ids for this user. - */ - @GET - @Path("queue") - @Produces({MediaType.APPLICATION_JSON}) - public List showQueueList(@QueryParam("showall") boolean showall) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - verifyUser(); - - ListDelegator d = new ListDelegator(appConf); - return d.run(getDoAsUser(), showall); - } - - /** - * Notify on a completed job. - */ - @GET - @Path("internal/complete/{jobid}") - @Produces({MediaType.APPLICATION_JSON}) - public CompleteBean completeJob(@PathParam("jobid") String jobid) - throws CallbackFailedException, IOException { - CompleteDelegator d = new CompleteDelegator(appConf); - return d.run(jobid); - } - - /** - * Verify that we have a valid user. Throw an exception if invalid. - */ - public void verifyUser() throws NotAuthorizedException { - String requestingUser = getRequestingUser(); - if (requestingUser == null) { - String msg = "No user found."; - if (!UserGroupInformation.isSecurityEnabled()) - msg += " Missing " + PseudoAuthenticator.USER_NAME + " parameter."; - throw new NotAuthorizedException(msg); - } - if(doAs != null && !doAs.equals(requestingUser)) { - /*if doAs user is different than logged in user, need to check that - that logged in user is authorized to run as 'doAs'*/ - ProxyUserSupport.validate(requestingUser, getRequestingHost(requestingUser, request), doAs); - } - } - /** - * All 'tasks' spawned by WebHCat should be run as this user. W/o doAs query parameter - * this is just the user making the request (or - * {@link org.apache.hadoop.security.authentication.client.PseudoAuthenticator#USER_NAME} - * query param). - * @return value of doAs query parameter or {@link #getRequestingUser()} - */ - private String getDoAsUser() { - return doAs != null && !doAs.equals(getRequestingUser()) ? doAs : getRequestingUser(); - } - /** - * Verify that the parameter exists. Throw an exception if invalid. - */ - public void verifyParam(String param, String name) - throws BadParam { - if (param == null) - throw new BadParam("Missing " + name + " parameter"); - } - - /** - * Verify that the parameter exists. Throw an exception if invalid. - */ - public void verifyParam(List param, String name) - throws BadParam { - if (param == null || param.isEmpty()) - throw new BadParam("Missing " + name + " parameter"); - } - - public static final Pattern DDL_ID = Pattern.compile("[a-zA-Z]\\w*"); - - /** - * Verify that the parameter exists and is a simple DDL identifier - * name. Throw an exception if invalid. - * - * Bug: This needs to allow for quoted ddl identifiers. - */ - public void verifyDdlParam(String param, String name) - throws BadParam { - verifyParam(param, name); - Matcher m = DDL_ID.matcher(param); - if (!m.matches()) - throw new BadParam("Invalid DDL identifier " + name); - } - /** - * Get the user name from the security context, i.e. the user making the HTTP request. - * With simple/pseudo security mode this should return the - * value of user.name query param, in kerberos mode it's the kinit'ed user. - */ - private String getRequestingUser() { - if (theSecurityContext == null) - return null; - if (theSecurityContext.getUserPrincipal() == null) - return null; - //map hue/foo.bar@something.com->hue since user group checks - // and config files are in terms of short name - return UserGroupInformation.createRemoteUser( - theSecurityContext.getUserPrincipal().getName()).getShortUserName(); - } - - /** - * The callback url on this server when a task is completed. - */ - public String getCompletedUrl() { - if (theUriInfo == null) - return null; - if (theUriInfo.getBaseUri() == null) - return null; - return theUriInfo.getBaseUri() + VERSION - + "/internal/complete/$jobId"; - } - /** - * Returns canonical host name from which the request is made; used for doAs validation - */ - private static String getRequestingHost(String requestingUser, HttpServletRequest request) { - final String unkHost = "???"; - if(request == null) { - LOG.warn("request is null; cannot determine hostname"); - return unkHost; - } - try { - String address = request.getRemoteAddr();//returns IP addr - if(address == null) { - LOG.warn(MessageFormat.format("Request remote address is NULL for user [{0}]", requestingUser)); - return unkHost; - } - - //Inet4Address/Inet6Address - String hostName = InetAddress.getByName(address).getCanonicalHostName(); - if(LOG.isDebugEnabled()) { - LOG.debug(MessageFormat.format("Resolved remote hostname: [{0}]", hostName)); - } - return hostName; - - } catch (UnknownHostException ex) { - LOG.warn(MessageFormat.format("Request remote address could not be resolved, {0}", ex.toString(), ex)); - return unkHost; - } - } + public static final String VERSION = "v1"; + public static final String DO_AS_PARAM = "doAs"; + + /** + * The status message. Always "ok" + */ + public static final Map STATUS_OK = createStatusMsg(); + + /** + * The list of supported api versions. + */ + public static final Map SUPPORTED_VERSIONS = createVersions(); + + /** + * The list of supported return formats. Always json. + */ + public static final Map SUPPORTED_FORMATS = createFormats(); + + // Build the status message for the /status call. + private static Map createStatusMsg() { + HashMap res = new HashMap(); + res.put("status", "ok"); + res.put("version", VERSION); + + return Collections.unmodifiableMap(res); + } + + // Build the versions list. + private static Map createVersions() { + ArrayList versions = new ArrayList(); + versions.add(VERSION); + + HashMap res = new HashMap(); + res.put("supportedVersions", versions); + res.put("version", VERSION); + + return Collections.unmodifiableMap(res); + } + + // Build the supported formats list + private static Map createFormats() { + ArrayList formats = new ArrayList(); + formats.add(MediaType.APPLICATION_JSON); + HashMap res = new HashMap(); + res.put("responseTypes", formats); + + return Collections.unmodifiableMap(res); + } + + protected static ExecService execService = ExecServiceImpl.getInstance(); + private static AppConfig appConf = Main.getAppConfigInstance(); + + // The SecurityContext set by AuthFilter + private + @Context + SecurityContext theSecurityContext; + + // The uri requested + private + @Context + UriInfo theUriInfo; + private @QueryParam(DO_AS_PARAM) String doAs; + private @Context HttpServletRequest request; + + private static final Log LOG = LogFactory.getLog(Server.class); + + /** + * Check the status of this server. Always OK. + */ + @GET + @Path("status") + @Produces({MediaType.APPLICATION_JSON}) + public Map status() { + return STATUS_OK; + } + + /** + * Check the supported request formats of this server. + */ + @GET + @Produces({MediaType.APPLICATION_JSON}) + public Map requestFormats() { + return SUPPORTED_FORMATS; + } + + /** + * Check the version(s) supported by this server. + */ + @GET + @Path("version") + @Produces({MediaType.APPLICATION_JSON}) + public Map version() { + return SUPPORTED_VERSIONS; + } + + /** + * Execute an hcat ddl expression on the local box. It is run + * as the authenticated user and rate limited. + */ + @POST + @Path("ddl") + @Produces({MediaType.APPLICATION_JSON}) + public ExecBean ddl(@FormParam("exec") String exec, + @FormParam("group") String group, + @FormParam("permissions") String permissions) + throws NotAuthorizedException, BusyException, BadParam, + ExecuteException, IOException { + verifyUser(); + verifyParam(exec, "exec"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.run(getDoAsUser(), exec, false, group, permissions); + } + + /** + * List all the tables in an hcat database. + */ + @GET + @Path("ddl/database/{db}/table") + @Produces(MediaType.APPLICATION_JSON) + public Response listTables(@PathParam("db") String db, + @QueryParam("like") String tablePattern) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + if (!TempletonUtils.isset(tablePattern)) + tablePattern = "*"; + return d.listTables(getDoAsUser(), db, tablePattern); + } + + /** + * Create a new table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response createTable(@PathParam("db") String db, + @PathParam("table") String table, + TableDesc desc) + throws SimpleWebException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + desc.table = table; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.createTable(getDoAsUser(), db, desc); + } + + /** + * Create a new table like another table. + */ + @PUT + @Path("ddl/database/{db}/table/{existingTable}/like/{newTable}") + @Produces(MediaType.APPLICATION_JSON) + public Response createTableLike(@PathParam("db") String db, + @PathParam("existingTable") String existingTable, + @PathParam("newTable") String newTable, + TableLikeDesc desc) + throws SimpleWebException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(existingTable, ":existingTable"); + verifyDdlParam(newTable, ":newTable"); + desc.existingTable = existingTable; + desc.newTable = newTable; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.createTableLike(getDoAsUser(), db, desc); + } + + /** + * Describe an hcat table. This is normally a simple list of + * columns (using "desc table"), but the extended format will show + * more information (using "show table extended like"). + */ + @GET + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response descTable(@PathParam("db") String db, + @PathParam("table") String table, + @QueryParam("format") String format) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + if ("extended".equals(format)) + return d.descExtendedTable(getDoAsUser(), db, table); + else + return d.descTable(getDoAsUser(), db, table, false); + } + + /** + * Drop an hcat table. + */ + @DELETE + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response dropTable(@PathParam("db") String db, + @PathParam("table") String table, + @QueryParam("ifExists") boolean ifExists, + @QueryParam("group") String group, + @QueryParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.dropTable(getDoAsUser(), db, table, ifExists, group, permissions); + } + + /** + * Rename an hcat table. + */ + @POST + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response renameTable(@PathParam("db") String db, + @PathParam("table") String oldTable, + @FormParam("rename") String newTable, + @FormParam("group") String group, + @FormParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(oldTable, ":table"); + verifyDdlParam(newTable, "rename"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.renameTable(getDoAsUser(), db, oldTable, newTable, group, permissions); + } + + /** + * Describe a single property on an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/property/{property}") + @Produces(MediaType.APPLICATION_JSON) + public Response descOneTableProperty(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("property") String property) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyDdlParam(property, ":property"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descTableProperty(getDoAsUser(), db, table, property); + } + + /** + * List all the properties on an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/property") + @Produces(MediaType.APPLICATION_JSON) + public Response listTableProperties(@PathParam("db") String db, + @PathParam("table") String table) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.listTableProperties(getDoAsUser(), db, table); + } + + /** + * Add a single property on an hcat table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}/property/{property}") + @Produces(MediaType.APPLICATION_JSON) + public Response addOneTableProperty(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("property") String property, + TablePropertyDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyDdlParam(property, ":property"); + desc.name = property; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.addOneTableProperty(getDoAsUser(), db, table, desc); + } + + /** + * List all the partitions in an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/partition") + @Produces(MediaType.APPLICATION_JSON) + public Response listPartitions(@PathParam("db") String db, + @PathParam("table") String table) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.listPartitions(getDoAsUser(), db, table); + } + + /** + * Describe a single partition in an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/partition/{partition}") + @Produces(MediaType.APPLICATION_JSON) + public Response descPartition(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("partition") String partition) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(partition, ":partition"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descOnePartition(getDoAsUser(), db, table, partition); + } + + /** + * Create a partition in an hcat table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}/partition/{partition}") + @Produces(MediaType.APPLICATION_JSON) + public Response addOnePartition(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("partition") String partition, + PartitionDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(partition, ":partition"); + desc.partition = partition; + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.addOnePartition(getDoAsUser(), db, table, desc); + } + + /** + * Drop a partition in an hcat table. + */ + @DELETE + @Path("ddl/database/{db}/table/{table}/partition/{partition}") + @Produces(MediaType.APPLICATION_JSON) + public Response dropPartition(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("partition") String partition, + @QueryParam("ifExists") boolean ifExists, + @QueryParam("group") String group, + @QueryParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(partition, ":partition"); + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.dropPartition(getDoAsUser(), db, table, partition, ifExists, + group, permissions); + } + + /** + * List all databases, or those that match a pattern. + */ + @GET + @Path("ddl/database/") + @Produces(MediaType.APPLICATION_JSON) + public Response listDatabases(@QueryParam("like") String dbPattern) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + + HcatDelegator d = new HcatDelegator(appConf, execService); + if (!TempletonUtils.isset(dbPattern)) + dbPattern = "*"; + return d.listDatabases(getDoAsUser(), dbPattern); + } + + /** + * Describe a database + */ + @GET + @Path("ddl/database/{db}") + @Produces(MediaType.APPLICATION_JSON) + public Response descDatabase(@PathParam("db") String db, + @QueryParam("format") String format) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descDatabase(getDoAsUser(), db, "extended".equals(format)); + } + + /** + * Create a database + */ + @PUT + @Path("ddl/database/{db}") + @Produces(MediaType.APPLICATION_JSON) + public Response createDatabase(@PathParam("db") String db, + DatabaseDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + desc.database = db; + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.createDatabase(getDoAsUser(), desc); + } + + /** + * Drop a database + */ + @DELETE + @Path("ddl/database/{db}") + @Produces(MediaType.APPLICATION_JSON) + public Response dropDatabase(@PathParam("db") String db, + @QueryParam("ifExists") boolean ifExists, + @QueryParam("option") String option, + @QueryParam("group") String group, + @QueryParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + if (TempletonUtils.isset(option)) + verifyDdlParam(option, "option"); + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.dropDatabase(getDoAsUser(), db, ifExists, option, + group, permissions); + } + + /** + * List the columns in an hcat table. Currently the same as + * describe table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/column") + @Produces(MediaType.APPLICATION_JSON) + public Response listColumns(@PathParam("db") String db, + @PathParam("table") String table) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.listColumns(getDoAsUser(), db, table); + } + + /** + * Describe a single column in an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/column/{column}") + @Produces(MediaType.APPLICATION_JSON) + public Response descColumn(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("column") String column) + throws SimpleWebException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(column, ":column"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descOneColumn(getDoAsUser(), db, table, column); + } + + /** + * Create a column in an hcat table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}/column/{column}") + @Produces(MediaType.APPLICATION_JSON) + public Response addOneColumn(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("column") String column, + ColumnDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(column, ":column"); + verifyParam(desc.type, "type"); + desc.name = column; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.addOneColumn(getDoAsUser(), db, table, desc); + } + + /** + * Run a MapReduce Streaming job. + */ + @POST + @Path("mapreduce/streaming") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean mapReduceStreaming(@FormParam("input") List inputs, + @FormParam("output") String output, + @FormParam("mapper") String mapper, + @FormParam("reducer") String reducer, + @FormParam("file") List files, + @FormParam("define") List defines, + @FormParam("cmdenv") List cmdenvs, + @FormParam("arg") List args, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + verifyParam(inputs, "input"); + verifyParam(mapper, "mapper"); + verifyParam(reducer, "reducer"); + + StreamingDelegator d = new StreamingDelegator(appConf); + return d.run(getDoAsUser(), inputs, output, mapper, reducer, + files, defines, cmdenvs, args, + statusdir, callback, getCompletedUrl()); + } + + /** + * Run a MapReduce Jar job. + */ + @POST + @Path("mapreduce/jar") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean mapReduceJar(@FormParam("jar") String jar, + @FormParam("class") String mainClass, + @FormParam("libjars") String libjars, + @FormParam("files") String files, + @FormParam("arg") List args, + @FormParam("define") List defines, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + verifyParam(jar, "jar"); + verifyParam(mainClass, "class"); + + JarDelegator d = new JarDelegator(appConf); + return d.run(getDoAsUser(), + jar, mainClass, + libjars, files, args, defines, + statusdir, callback, getCompletedUrl()); + } + + /** + * Run a Pig job. + */ + @POST + @Path("pig") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean pig(@FormParam("execute") String execute, + @FormParam("file") String srcFile, + @FormParam("arg") List pigArgs, + @FormParam("files") String otherFiles, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + if (execute == null && srcFile == null) + throw new BadParam("Either execute or file parameter required"); + + PigDelegator d = new PigDelegator(appConf); + return d.run(getDoAsUser(), + execute, srcFile, + pigArgs, otherFiles, + statusdir, callback, getCompletedUrl()); + } + + /** + * Run a Hive job. + */ + @POST + @Path("hive") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean hive(@FormParam("execute") String execute, + @FormParam("file") String srcFile, + @FormParam("define") List defines, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + if (execute == null && srcFile == null) + throw new BadParam("Either execute or file parameter required"); + + HiveDelegator d = new HiveDelegator(appConf); + return d.run(getDoAsUser(), execute, srcFile, defines, + statusdir, callback, getCompletedUrl()); + } + + /** + * Return the status of the jobid. + */ + @GET + @Path("queue/{jobid}") + @Produces({MediaType.APPLICATION_JSON}) + public QueueStatusBean showQueueId(@PathParam("jobid") String jobid) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { + + verifyUser(); + verifyParam(jobid, ":jobid"); + + StatusDelegator d = new StatusDelegator(appConf); + return d.run(getDoAsUser(), jobid); + } + + /** + * Kill a job in the queue. + */ + @DELETE + @Path("queue/{jobid}") + @Produces({MediaType.APPLICATION_JSON}) + public QueueStatusBean deleteQueueId(@PathParam("jobid") String jobid) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { + + verifyUser(); + verifyParam(jobid, ":jobid"); + + DeleteDelegator d = new DeleteDelegator(appConf); + return d.run(getDoAsUser(), jobid); + } + + /** + * Return all the known job ids for this user. + */ + @GET + @Path("queue") + @Produces({MediaType.APPLICATION_JSON}) + public List showQueueList(@QueryParam("showall") boolean showall) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { + + verifyUser(); + + ListDelegator d = new ListDelegator(appConf); + return d.run(getDoAsUser(), showall); + } + + /** + * Notify on a completed job. + */ + @GET + @Path("internal/complete/{jobid}") + @Produces({MediaType.APPLICATION_JSON}) + public CompleteBean completeJob(@PathParam("jobid") String jobid) + throws CallbackFailedException, IOException { + CompleteDelegator d = new CompleteDelegator(appConf); + return d.run(jobid); + } + + /** + * Verify that we have a valid user. Throw an exception if invalid. + */ + public void verifyUser() throws NotAuthorizedException { + String requestingUser = getRequestingUser(); + if (requestingUser == null) { + String msg = "No user found."; + if (!UserGroupInformation.isSecurityEnabled()) + msg += " Missing " + PseudoAuthenticator.USER_NAME + " parameter."; + throw new NotAuthorizedException(msg); + } + if(doAs != null && !doAs.equals(requestingUser)) { + /*if doAs user is different than logged in user, need to check that + that logged in user is authorized to run as 'doAs'*/ + ProxyUserSupport.validate(requestingUser, getRequestingHost(requestingUser, request), doAs); + } + } + /** + * All 'tasks' spawned by WebHCat should be run as this user. W/o doAs query parameter + * this is just the user making the request (or + * {@link org.apache.hadoop.security.authentication.client.PseudoAuthenticator#USER_NAME} + * query param). + * @return value of doAs query parameter or {@link #getRequestingUser()} + */ + private String getDoAsUser() { + return doAs != null && !doAs.equals(getRequestingUser()) ? doAs : getRequestingUser(); + } + /** + * Verify that the parameter exists. Throw an exception if invalid. + */ + public void verifyParam(String param, String name) + throws BadParam { + if (param == null) + throw new BadParam("Missing " + name + " parameter"); + } + + /** + * Verify that the parameter exists. Throw an exception if invalid. + */ + public void verifyParam(List param, String name) + throws BadParam { + if (param == null || param.isEmpty()) + throw new BadParam("Missing " + name + " parameter"); + } + + public static final Pattern DDL_ID = Pattern.compile("[a-zA-Z]\\w*"); + + /** + * Verify that the parameter exists and is a simple DDL identifier + * name. Throw an exception if invalid. + * + * Bug: This needs to allow for quoted ddl identifiers. + */ + public void verifyDdlParam(String param, String name) + throws BadParam { + verifyParam(param, name); + Matcher m = DDL_ID.matcher(param); + if (!m.matches()) + throw new BadParam("Invalid DDL identifier " + name); + } + /** + * Get the user name from the security context, i.e. the user making the HTTP request. + * With simple/pseudo security mode this should return the + * value of user.name query param, in kerberos mode it's the kinit'ed user. + */ + private String getRequestingUser() { + if (theSecurityContext == null) + return null; + if (theSecurityContext.getUserPrincipal() == null) + return null; + //map hue/foo.bar@something.com->hue since user group checks + // and config files are in terms of short name + return UserGroupInformation.createRemoteUser( + theSecurityContext.getUserPrincipal().getName()).getShortUserName(); + } + + /** + * The callback url on this server when a task is completed. + */ + public String getCompletedUrl() { + if (theUriInfo == null) + return null; + if (theUriInfo.getBaseUri() == null) + return null; + return theUriInfo.getBaseUri() + VERSION + + "/internal/complete/$jobId"; + } + /** + * Returns canonical host name from which the request is made; used for doAs validation + */ + private static String getRequestingHost(String requestingUser, HttpServletRequest request) { + final String unkHost = "???"; + if(request == null) { + LOG.warn("request is null; cannot determine hostname"); + return unkHost; + } + try { + String address = request.getRemoteAddr();//returns IP addr + if(address == null) { + LOG.warn(MessageFormat.format("Request remote address is NULL for user [{0}]", requestingUser)); + return unkHost; + } + + //Inet4Address/Inet6Address + String hostName = InetAddress.getByName(address).getCanonicalHostName(); + if(LOG.isDebugEnabled()) { + LOG.debug(MessageFormat.format("Resolved remote hostname: [{0}]", hostName)); + } + return hostName; + + } catch (UnknownHostException ex) { + LOG.warn(MessageFormat.format("Request remote address could not be resolved, {0}", ex.toString(), ex)); + return unkHost; + } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleExceptionMapper.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleExceptionMapper.java index 0619b0f..df02ae9 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleExceptionMapper.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleExceptionMapper.java @@ -28,8 +28,8 @@ */ @Provider public class SimpleExceptionMapper - implements ExceptionMapper { - public Response toResponse(SimpleWebException e) { - return e.getResponse(); - } + implements ExceptionMapper { + public Response toResponse(SimpleWebException e) { + return e.getResponse(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java index 697219b..2dfbb5f 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java @@ -33,40 +33,40 @@ * error in the output log. See SimpleExceptionMapper. */ public class SimpleWebException extends Throwable { - public int httpCode; - public Map params; + public int httpCode; + public Map params; - public SimpleWebException(int httpCode, String msg) { - super(msg); - this.httpCode = httpCode; - } + public SimpleWebException(int httpCode, String msg) { + super(msg); + this.httpCode = httpCode; + } - public SimpleWebException(int httpCode, String msg, Map params) { - super(msg); - this.httpCode = httpCode; - this.params = params; - } + public SimpleWebException(int httpCode, String msg, Map params) { + super(msg); + this.httpCode = httpCode; + this.params = params; + } - public Response getResponse() { - return buildMessage(httpCode, params, getMessage()); - } + public Response getResponse() { + return buildMessage(httpCode, params, getMessage()); + } - public static Response buildMessage(int httpCode, Map params, - String msg) { - HashMap err = new HashMap(); - err.put("error", msg); - if (params != null) - err.putAll(params); + public static Response buildMessage(int httpCode, Map params, + String msg) { + HashMap err = new HashMap(); + err.put("error", msg); + if (params != null) + err.putAll(params); - String json = "\"error\""; - try { - json = new ObjectMapper().writeValueAsString(err); - } catch (IOException e) { - } - - return Response.status(httpCode) - .entity(json) - .type(MediaType.APPLICATION_JSON) - .build(); + String json = "\"error\""; + try { + json = new ObjectMapper().writeValueAsString(err); + } catch (IOException e) { } + + return Response.status(httpCode) + .entity(json) + .type(MediaType.APPLICATION_JSON) + .build(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StatusDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StatusDelegator.java index b50b9c1..3d306c7 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StatusDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StatusDelegator.java @@ -34,78 +34,78 @@ * Fetch the status of a given job id in the queue. */ public class StatusDelegator extends TempletonDelegator { - private static final Log LOG = LogFactory.getLog(StatusDelegator.class); + private static final Log LOG = LogFactory.getLog(StatusDelegator.class); - public StatusDelegator(AppConfig appConf) { - super(appConf); - } + public StatusDelegator(AppConfig appConf) { + super(appConf); + } - public QueueStatusBean run(String user, String id) - throws NotAuthorizedException, BadParam, IOException, InterruptedException - { - WebHCatJTShim tracker = null; - JobState state = null; - try { - UserGroupInformation ugi = UgiFactory.getUgi(user); - tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); - JobID jobid = StatusDelegator.StringToJobID(id); - if (jobid == null) - throw new BadParam("Invalid jobid: " + id); - state = new JobState(id, Main.getAppConfigInstance()); - return StatusDelegator.makeStatus(tracker, jobid, state); - } catch (IllegalStateException e) { - throw new BadParam(e.getMessage()); - } finally { - if (tracker != null) - tracker.close(); - if (state != null) - state.close(); - } + public QueueStatusBean run(String user, String id) + throws NotAuthorizedException, BadParam, IOException, InterruptedException + { + WebHCatJTShim tracker = null; + JobState state = null; + try { + UserGroupInformation ugi = UgiFactory.getUgi(user); + tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); + JobID jobid = StatusDelegator.StringToJobID(id); + if (jobid == null) + throw new BadParam("Invalid jobid: " + id); + state = new JobState(id, Main.getAppConfigInstance()); + return StatusDelegator.makeStatus(tracker, jobid, state); + } catch (IllegalStateException e) { + throw new BadParam(e.getMessage()); + } finally { + if (tracker != null) + tracker.close(); + if (state != null) + state.close(); } + } - public static QueueStatusBean makeStatus(WebHCatJTShim tracker, - JobID jobid, - String childid, - JobState state) - throws BadParam, IOException { - JobID bestid = jobid; - if (childid != null) - bestid = StatusDelegator.StringToJobID(childid); + public static QueueStatusBean makeStatus(WebHCatJTShim tracker, + JobID jobid, + String childid, + JobState state) + throws BadParam, IOException { + JobID bestid = jobid; + if (childid != null) + bestid = StatusDelegator.StringToJobID(childid); - JobStatus status = tracker.getJobStatus(bestid); - JobProfile profile = tracker.getJobProfile(bestid); + JobStatus status = tracker.getJobStatus(bestid); + JobProfile profile = tracker.getJobProfile(bestid); - if (status == null || profile == null) { - if (bestid != jobid) { // Corrupt childid, retry. - LOG.error("Corrupt child id " + childid + " for " + jobid); - bestid = jobid; - status = tracker.getJobStatus(bestid); - profile = tracker.getJobProfile(bestid); - } - } + if (status == null || profile == null) { + if (bestid != jobid) { // Corrupt childid, retry. + LOG.error("Corrupt child id " + childid + " for " + jobid); + bestid = jobid; + status = tracker.getJobStatus(bestid); + profile = tracker.getJobProfile(bestid); + } + } - if (status == null || profile == null) // No such job. - throw new BadParam("Could not find job " + bestid); + if (status == null || profile == null) // No such job. + throw new BadParam("Could not find job " + bestid); - return new QueueStatusBean(state, status, profile); - } + return new QueueStatusBean(state, status, profile); + } - public static QueueStatusBean makeStatus(WebHCatJTShim tracker, - JobID jobid, - JobState state) - throws BadParam, IOException { - return makeStatus(tracker, jobid, state.getChildId(), state); - } + public static QueueStatusBean makeStatus(WebHCatJTShim tracker, + JobID jobid, + JobState state) + throws BadParam, IOException { + return makeStatus(tracker, jobid, state.getChildId(), state); + } - /** - * A version of JobID.forName with our app specific error handling. - */ - public static JobID StringToJobID(String id) - throws BadParam { - try { - return JobID.forName(id); - } catch (IllegalArgumentException e) { - throw new BadParam(e.getMessage()); - } + /** + * A version of JobID.forName with our app specific error handling. + */ + public static JobID StringToJobID(String id) + throws BadParam { + try { + return JobID.forName(id); + } catch (IllegalArgumentException e) { + throw new BadParam(e.getMessage()); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java index edef153..f9f6c94 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java @@ -31,59 +31,59 @@ * This is the backend of the mapreduce/streaming web service. */ public class StreamingDelegator extends LauncherDelegator { - public StreamingDelegator(AppConfig appConf) { - super(appConf); - } + public StreamingDelegator(AppConfig appConf) { + super(appConf); + } - public EnqueueBean run(String user, - List inputs, String output, - String mapper, String reducer, - List files, List defines, - List cmdenvs, - List jarArgs, - String statusdir, - String callback, - String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException { - List args = makeArgs(inputs, output, mapper, reducer, - files, defines, cmdenvs, jarArgs); + public EnqueueBean run(String user, + List inputs, String output, + String mapper, String reducer, + List files, List defines, + List cmdenvs, + List jarArgs, + String statusdir, + String callback, + String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException { + List args = makeArgs(inputs, output, mapper, reducer, + files, defines, cmdenvs, jarArgs); - JarDelegator d = new JarDelegator(appConf); - return d.run(user, - appConf.streamingJar(), null, - null, null, args, defines, - statusdir, callback, completedUrl); - } + JarDelegator d = new JarDelegator(appConf); + return d.run(user, + appConf.streamingJar(), null, + null, null, args, defines, + statusdir, callback, completedUrl); + } - private List makeArgs(List inputs, - String output, - String mapper, - String reducer, - List files, - List defines, - List cmdenvs, - List jarArgs) { - ArrayList args = new ArrayList(); - for (String input : inputs) { - args.add("-input"); - args.add(input); - } - args.add("-output"); - args.add(output); - args.add("-mapper"); - args.add(mapper); - args.add("-reducer"); - args.add(reducer); + private List makeArgs(List inputs, + String output, + String mapper, + String reducer, + List files, + List defines, + List cmdenvs, + List jarArgs) { + ArrayList args = new ArrayList(); + for (String input : inputs) { + args.add("-input"); + args.add(input); + } + args.add("-output"); + args.add(output); + args.add("-mapper"); + args.add(mapper); + args.add("-reducer"); + args.add(reducer); - for (String f : files) - args.add("-file" + f); - for (String d : defines) - args.add("-D" + d); - for (String e : cmdenvs) - args.add("-cmdenv" + e); - args.addAll(jarArgs); + for (String f : files) + args.add("-file" + f); + for (String d : defines) + args.add("-D" + d); + for (String e : cmdenvs) + args.add("-cmdenv" + e); + args.addAll(jarArgs); - return args; - } + return args; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableDesc.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableDesc.java index 9ee9b52..f7cc3e9 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableDesc.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableDesc.java @@ -27,219 +27,219 @@ */ @XmlRootElement public class TableDesc extends GroupPermissionsDesc { - public boolean external = false; - public boolean ifNotExists = false; - public String table; - public String comment; - public List columns; - public List partitionedBy; - public ClusteredByDesc clusteredBy; - public StorageFormatDesc format; - public String location; - public Map tableProperties; - - /** - * Create a new TableDesc - */ - public TableDesc() { + public boolean external = false; + public boolean ifNotExists = false; + public String table; + public String comment; + public List columns; + public List partitionedBy; + public ClusteredByDesc clusteredBy; + public StorageFormatDesc format; + public String location; + public Map tableProperties; + + /** + * Create a new TableDesc + */ + public TableDesc() { + } + + public String toString() { + return String.format("TableDesc(table=%s, columns=%s)", table, columns); + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TableDesc)) + return false; + TableDesc that = (TableDesc) o; + return xequals(this.external, that.external) + && xequals(this.ifNotExists, that.ifNotExists) + && xequals(this.table, that.table) + && xequals(this.comment, that.comment) + && xequals(this.columns, that.columns) + && xequals(this.partitionedBy, that.partitionedBy) + && xequals(this.clusteredBy, that.clusteredBy) + && xequals(this.format, that.format) + && xequals(this.location, that.location) + && xequals(this.tableProperties, that.tableProperties) + && super.equals(that) + ; + } + + /** + * How to cluster the table. + */ + @XmlRootElement + public static class ClusteredByDesc { + public List columnNames; + public List sortedBy; + public int numberOfBuckets; + + public ClusteredByDesc() { } public String toString() { - return String.format("TableDesc(table=%s, columns=%s)", table, columns); + String fmt + = "ClusteredByDesc(columnNames=%s, sortedBy=%s, numberOfBuckets=%s)"; + return String.format(fmt, columnNames, sortedBy, numberOfBuckets); } public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof TableDesc)) - return false; - TableDesc that = (TableDesc) o; - return xequals(this.external, that.external) - && xequals(this.ifNotExists, that.ifNotExists) - && xequals(this.table, that.table) - && xequals(this.comment, that.comment) - && xequals(this.columns, that.columns) - && xequals(this.partitionedBy, that.partitionedBy) - && xequals(this.clusteredBy, that.clusteredBy) - && xequals(this.format, that.format) - && xequals(this.location, that.location) - && xequals(this.tableProperties, that.tableProperties) - && super.equals(that) - ; + if (this == o) + return true; + if (!(o instanceof ClusteredByDesc)) + return false; + ClusteredByDesc that = (ClusteredByDesc) o; + return xequals(this.columnNames, that.columnNames) + && xequals(this.sortedBy, that.sortedBy) + && xequals(this.numberOfBuckets, that.numberOfBuckets) + ; } + } - /** - * How to cluster the table. - */ - @XmlRootElement - public static class ClusteredByDesc { - public List columnNames; - public List sortedBy; - public int numberOfBuckets; - - public ClusteredByDesc() { - } - - public String toString() { - String fmt - = "ClusteredByDesc(columnNames=%s, sortedBy=%s, numberOfBuckets=%s)"; - return String.format(fmt, columnNames, sortedBy, numberOfBuckets); - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof ClusteredByDesc)) - return false; - ClusteredByDesc that = (ClusteredByDesc) o; - return xequals(this.columnNames, that.columnNames) - && xequals(this.sortedBy, that.sortedBy) - && xequals(this.numberOfBuckets, that.numberOfBuckets) - ; - } + /** + * The clustered sort order. + */ + @XmlRootElement + public static class ClusterSortOrderDesc { + public String columnName; + public SortDirectionDesc order; + + public ClusterSortOrderDesc() { } - /** - * The clustered sort order. - */ - @XmlRootElement - public static class ClusterSortOrderDesc { - public String columnName; - public SortDirectionDesc order; - - public ClusterSortOrderDesc() { - } - - public ClusterSortOrderDesc(String columnName, SortDirectionDesc order) { - this.columnName = columnName; - this.order = order; - } - - public String toString() { - return String - .format("ClusterSortOrderDesc(columnName=%s, order=%s)", - columnName, order); - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof ClusterSortOrderDesc)) - return false; - ClusterSortOrderDesc that = (ClusterSortOrderDesc) o; - return xequals(this.columnName, that.columnName) - && xequals(this.order, that.order) - ; - } + public ClusterSortOrderDesc(String columnName, SortDirectionDesc order) { + this.columnName = columnName; + this.order = order; } - /** - * Ther ASC or DESC sort order. - */ - @XmlRootElement - public static enum SortDirectionDesc { - ASC, DESC + public String toString() { + return String + .format("ClusterSortOrderDesc(columnName=%s, order=%s)", + columnName, order); } - /** - * The storage format. - */ - @XmlRootElement - public static class StorageFormatDesc { - public RowFormatDesc rowFormat; - public String storedAs; - public StoredByDesc storedBy; - - public StorageFormatDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof StorageFormatDesc)) - return false; - StorageFormatDesc that = (StorageFormatDesc) o; - return xequals(this.rowFormat, that.rowFormat) - && xequals(this.storedAs, that.storedAs) - && xequals(this.storedBy, that.storedBy) - ; - } + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof ClusterSortOrderDesc)) + return false; + ClusterSortOrderDesc that = (ClusterSortOrderDesc) o; + return xequals(this.columnName, that.columnName) + && xequals(this.order, that.order) + ; + } + } + + /** + * Ther ASC or DESC sort order. + */ + @XmlRootElement + public static enum SortDirectionDesc { + ASC, DESC + } + + /** + * The storage format. + */ + @XmlRootElement + public static class StorageFormatDesc { + public RowFormatDesc rowFormat; + public String storedAs; + public StoredByDesc storedBy; + + public StorageFormatDesc() { } - /** - * The Row Format. - */ - @XmlRootElement - public static class RowFormatDesc { - public String fieldsTerminatedBy; - public String collectionItemsTerminatedBy; - public String mapKeysTerminatedBy; - public String linesTerminatedBy; - public SerdeDesc serde; - - public RowFormatDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof RowFormatDesc)) - return false; - RowFormatDesc that = (RowFormatDesc) o; - return xequals(this.fieldsTerminatedBy, that.fieldsTerminatedBy) - && xequals(this.collectionItemsTerminatedBy, - that.collectionItemsTerminatedBy) - && xequals(this.mapKeysTerminatedBy, that.mapKeysTerminatedBy) - && xequals(this.linesTerminatedBy, that.linesTerminatedBy) - && xequals(this.serde, that.serde) - ; - } + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof StorageFormatDesc)) + return false; + StorageFormatDesc that = (StorageFormatDesc) o; + return xequals(this.rowFormat, that.rowFormat) + && xequals(this.storedAs, that.storedAs) + && xequals(this.storedBy, that.storedBy) + ; + } + } + + /** + * The Row Format. + */ + @XmlRootElement + public static class RowFormatDesc { + public String fieldsTerminatedBy; + public String collectionItemsTerminatedBy; + public String mapKeysTerminatedBy; + public String linesTerminatedBy; + public SerdeDesc serde; + + public RowFormatDesc() { } - /** - * The SERDE Row Format. - */ - @XmlRootElement - public static class SerdeDesc { - public String name; - public Map properties; - - public SerdeDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof SerdeDesc)) - return false; - SerdeDesc that = (SerdeDesc) o; - return xequals(this.name, that.name) - && xequals(this.properties, that.properties) - ; - } + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof RowFormatDesc)) + return false; + RowFormatDesc that = (RowFormatDesc) o; + return xequals(this.fieldsTerminatedBy, that.fieldsTerminatedBy) + && xequals(this.collectionItemsTerminatedBy, + that.collectionItemsTerminatedBy) + && xequals(this.mapKeysTerminatedBy, that.mapKeysTerminatedBy) + && xequals(this.linesTerminatedBy, that.linesTerminatedBy) + && xequals(this.serde, that.serde) + ; } + } - /** - * How to store the table. - */ - @XmlRootElement - public static class StoredByDesc { - public String className; - public Map properties; - - public StoredByDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof StoredByDesc)) - return false; - StoredByDesc that = (StoredByDesc) o; - return xequals(this.className, that.className) - && xequals(this.properties, that.properties) - ; - } + /** + * The SERDE Row Format. + */ + @XmlRootElement + public static class SerdeDesc { + public String name; + public Map properties; + + public SerdeDesc() { + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof SerdeDesc)) + return false; + SerdeDesc that = (SerdeDesc) o; + return xequals(this.name, that.name) + && xequals(this.properties, that.properties) + ; + } + } + + /** + * How to store the table. + */ + @XmlRootElement + public static class StoredByDesc { + public String className; + public Map properties; + + public StoredByDesc() { + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof StoredByDesc)) + return false; + StoredByDesc that = (StoredByDesc) o; + return xequals(this.className, that.className) + && xequals(this.properties, that.properties) + ; } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableLikeDesc.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableLikeDesc.java index 1e7e384..b272018 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableLikeDesc.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableLikeDesc.java @@ -25,17 +25,17 @@ */ @XmlRootElement public class TableLikeDesc extends GroupPermissionsDesc { - public boolean external = false; - public boolean ifNotExists = false; - public String location; - public String existingTable; - public String newTable; + public boolean external = false; + public boolean ifNotExists = false; + public String location; + public String existingTable; + public String newTable; - public TableLikeDesc() { - } + public TableLikeDesc() { + } - public String toString() { - return String.format("TableLikeDesc(existingTable=%s, newTable=%s, location=%s", - existingTable, newTable, location); - } + public String toString() { + return String.format("TableLikeDesc(existingTable=%s, newTable=%s, location=%s", + existingTable, newTable, location); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TablePropertyDesc.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TablePropertyDesc.java index adb2eb3..dbabb66 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TablePropertyDesc.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TablePropertyDesc.java @@ -25,13 +25,13 @@ */ @XmlRootElement public class TablePropertyDesc extends GroupPermissionsDesc { - public String name; - public String value; + public String name; + public String value; - public TablePropertyDesc() {} + public TablePropertyDesc() {} - public String toString() { - return String.format("TablePropertyDesc(name=%s, value=%s)", - name, value); - } + public String toString() { + return String.format("TablePropertyDesc(name=%s, value=%s)", + name, value); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java index 8c60f57..532a191 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java @@ -24,9 +24,9 @@ * or hive. */ public class TempletonDelegator { - protected AppConfig appConf; + protected AppConfig appConf; - public TempletonDelegator(AppConfig appConf) { - this.appConf = appConf; - } + public TempletonDelegator(AppConfig appConf) { + this.appConf = appConf; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/UgiFactory.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/UgiFactory.java index 4aac823..dd7a71e 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/UgiFactory.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/UgiFactory.java @@ -24,27 +24,27 @@ import org.apache.hadoop.security.UserGroupInformation; public class UgiFactory { - private static ConcurrentHashMap userUgiMap = - new ConcurrentHashMap(); - - public static UserGroupInformation getUgi(String user) throws IOException { - UserGroupInformation ugi = userUgiMap.get(user); - if (ugi == null) { - //create new ugi and add to map - final UserGroupInformation newUgi = - UserGroupInformation.createProxyUser(user, - UserGroupInformation.getLoginUser()); - - //if another thread adds an entry before the check in this one - // the one created here will not be added. - userUgiMap.putIfAbsent(user, newUgi); - - //use the UGI object that got added - return userUgiMap.get(user); - - } - return ugi; + private static ConcurrentHashMap userUgiMap = + new ConcurrentHashMap(); + + public static UserGroupInformation getUgi(String user) throws IOException { + UserGroupInformation ugi = userUgiMap.get(user); + if (ugi == null) { + //create new ugi and add to map + final UserGroupInformation newUgi = + UserGroupInformation.createProxyUser(user, + UserGroupInformation.getLoginUser()); + + //if another thread adds an entry before the check in this one + // the one created here will not be added. + userUgiMap.putIfAbsent(user, newUgi); + + //use the UGI object that got added + return userUgiMap.get(user); + } + return ugi; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/WadlConfig.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/WadlConfig.java index 872be65..1bfc6e9 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/WadlConfig.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/WadlConfig.java @@ -31,11 +31,11 @@ */ public class WadlConfig extends WadlGeneratorConfig { - @Override - public List configure() { - return generator(WadlGeneratorResourceDocSupport.class) - .prop("resourceDocStream", "resourcedoc.xml") - .descriptions(); - } + @Override + public List configure() { + return generator(WadlGeneratorResourceDocSupport.class) + .prop("resourceDocStream", "resourcedoc.xml") + .descriptions(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSCleanup.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSCleanup.java index a824628..3511159 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSCleanup.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSCleanup.java @@ -34,118 +34,118 @@ * This does periodic cleanup */ public class HDFSCleanup extends Thread { - protected Configuration appConf; - - // The interval to wake up and check the queue - public static final String HDFS_CLEANUP_INTERVAL = - "templeton.hdfs.cleanup.interval"; // 12 hours - - // The max age of a task allowed - public static final String HDFS_CLEANUP_MAX_AGE = - "templeton.hdfs.cleanup.maxage"; // ~ 1 week - - protected static long interval = 1000L * 60L * 60L * 12L; - protected static long maxage = 1000L * 60L * 60L * 24L * 7L; - - // The logger - private static final Log LOG = LogFactory.getLog(HDFSCleanup.class); - - // Handle to cancel loop - private boolean stop = false; - - // The instance - private static HDFSCleanup thisclass = null; - - // Whether the cycle is running - private static boolean isRunning = false; - - // The storage root - private String storage_root; - - /** - * Create a cleanup object. - */ - private HDFSCleanup(Configuration appConf) { - this.appConf = appConf; - interval = appConf.getLong(HDFS_CLEANUP_INTERVAL, interval); - maxage = appConf.getLong(HDFS_CLEANUP_MAX_AGE, maxage); - storage_root = appConf.get(TempletonStorage.STORAGE_ROOT); - } - - public static HDFSCleanup getInstance(Configuration appConf) { - if (thisclass != null) { - return thisclass; - } - thisclass = new HDFSCleanup(appConf); - return thisclass; - } + protected Configuration appConf; - public static void startInstance(Configuration appConf) throws IOException { - if (!isRunning) { - getInstance(appConf).start(); - } + // The interval to wake up and check the queue + public static final String HDFS_CLEANUP_INTERVAL = + "templeton.hdfs.cleanup.interval"; // 12 hours + + // The max age of a task allowed + public static final String HDFS_CLEANUP_MAX_AGE = + "templeton.hdfs.cleanup.maxage"; // ~ 1 week + + protected static long interval = 1000L * 60L * 60L * 12L; + protected static long maxage = 1000L * 60L * 60L * 24L * 7L; + + // The logger + private static final Log LOG = LogFactory.getLog(HDFSCleanup.class); + + // Handle to cancel loop + private boolean stop = false; + + // The instance + private static HDFSCleanup thisclass = null; + + // Whether the cycle is running + private static boolean isRunning = false; + + // The storage root + private String storage_root; + + /** + * Create a cleanup object. + */ + private HDFSCleanup(Configuration appConf) { + this.appConf = appConf; + interval = appConf.getLong(HDFS_CLEANUP_INTERVAL, interval); + maxage = appConf.getLong(HDFS_CLEANUP_MAX_AGE, maxage); + storage_root = appConf.get(TempletonStorage.STORAGE_ROOT); + } + + public static HDFSCleanup getInstance(Configuration appConf) { + if (thisclass != null) { + return thisclass; } + thisclass = new HDFSCleanup(appConf); + return thisclass; + } - /** - * Run the cleanup loop. - * - */ - public void run() { - FileSystem fs = null; - while (!stop) { - try { - // Put each check in a separate try/catch, so if that particular - // cycle fails, it'll try again on the next cycle. - try { - if (fs == null) { - fs = FileSystem.get(appConf); - } - checkFiles(fs); - } catch (Exception e) { - LOG.error("Cleanup cycle failed: " + e.getMessage()); - } - - long sleepMillis = (long) (Math.random() * interval); - LOG.info("Next execution: " + new Date(new Date().getTime() - + sleepMillis)); - Thread.sleep(sleepMillis); - - } catch (Exception e) { - // If sleep fails, we should exit now before things get worse. - isRunning = false; - LOG.error("Cleanup failed: " + e.getMessage(), e); - } + public static void startInstance(Configuration appConf) throws IOException { + if (!isRunning) { + getInstance(appConf).start(); + } + } + + /** + * Run the cleanup loop. + * + */ + public void run() { + FileSystem fs = null; + while (!stop) { + try { + // Put each check in a separate try/catch, so if that particular + // cycle fails, it'll try again on the next cycle. + try { + if (fs == null) { + fs = FileSystem.get(appConf); + } + checkFiles(fs); + } catch (Exception e) { + LOG.error("Cleanup cycle failed: " + e.getMessage()); } + + long sleepMillis = (long) (Math.random() * interval); + LOG.info("Next execution: " + new Date(new Date().getTime() + + sleepMillis)); + Thread.sleep(sleepMillis); + + } catch (Exception e) { + // If sleep fails, we should exit now before things get worse. isRunning = false; + LOG.error("Cleanup failed: " + e.getMessage(), e); + } } - - /** - * Loop through all the files, deleting any that are older than - * maxage. - * - * @param fs - * @throws IOException - */ - private void checkFiles(FileSystem fs) throws IOException { - long now = new Date().getTime(); - for (Type type : Type.values()) { - try { - for (FileStatus status : fs.listStatus(new Path( - HDFSStorage.getPath(type, storage_root)))) { - if (now - status.getModificationTime() > maxage) { - LOG.info("Deleting " + status.getPath().toString()); - fs.delete(status.getPath(), true); - } - } - } catch (Exception e) { - // Nothing to find for this type. - } + isRunning = false; + } + + /** + * Loop through all the files, deleting any that are older than + * maxage. + * + * @param fs + * @throws IOException + */ + private void checkFiles(FileSystem fs) throws IOException { + long now = new Date().getTime(); + for (Type type : Type.values()) { + try { + for (FileStatus status : fs.listStatus(new Path( + HDFSStorage.getPath(type, storage_root)))) { + if (now - status.getModificationTime() > maxage) { + LOG.info("Deleting " + status.getPath().toString()); + fs.delete(status.getPath(), true); + } } + } catch (Exception e) { + // Nothing to find for this type. + } } + } - // Handle to stop this process from the outside if needed. - public void exit() { - stop = true; - } + // Handle to stop this process from the outside if needed. + public void exit() { + stop = true; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSStorage.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSStorage.java index fdcc8c1..14956da 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSStorage.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSStorage.java @@ -44,214 +44,214 @@ * */ public class HDFSStorage implements TempletonStorage { - FileSystem fs = null; + FileSystem fs = null; - public String storage_root = null; + public String storage_root = null; - public static final String JOB_PATH = "/jobs"; - public static final String JOB_TRACKINGPATH = "/created"; - public static final String OVERHEAD_PATH = "/overhead"; + public static final String JOB_PATH = "/jobs"; + public static final String JOB_TRACKINGPATH = "/created"; + public static final String OVERHEAD_PATH = "/overhead"; - private static final Log LOG = LogFactory.getLog(HDFSStorage.class); + private static final Log LOG = LogFactory.getLog(HDFSStorage.class); - public void startCleanup(Configuration config) { - try { - HDFSCleanup.startInstance(config); - } catch (Exception e) { - LOG.warn("Cleanup instance didn't start."); - } + public void startCleanup(Configuration config) { + try { + HDFSCleanup.startInstance(config); + } catch (Exception e) { + LOG.warn("Cleanup instance didn't start."); } + } - @Override - public void saveField(Type type, String id, String key, String val) - throws NotFoundException { - if (val == null) { - return; - } - PrintWriter out = null; - //todo: FileSystem#setPermission() - should this make sure to set 777 on jobs/ ? - Path keyfile= new Path(getPath(type) + "/" + id + "/" + key); - try { - // This will replace the old value if there is one - // Overwrite the existing file - out = new PrintWriter(new OutputStreamWriter(fs.create(keyfile))); - out.write(val); - out.flush(); - } catch (Exception e) { - String errMsg = "Couldn't write to " + keyfile + ": " + e.getMessage(); - LOG.error(errMsg, e); - throw new NotFoundException(errMsg, e); - } finally { - close(out); - } + @Override + public void saveField(Type type, String id, String key, String val) + throws NotFoundException { + if (val == null) { + return; } - - @Override - public String getField(Type type, String id, String key) { - BufferedReader in = null; - Path p = new Path(getPath(type) + "/" + id + "/" + key); - try { - in = new BufferedReader(new InputStreamReader(fs.open(p))); - String line = null; - String val = ""; - while ((line = in.readLine()) != null) { - if (!val.equals("")) { - val += "\n"; - } - val += line; - } - return val; - } catch (Exception e) { - LOG.info("Couldn't find " + p + ": " + e.getMessage(), e); - } finally { - close(in); - } - return null; + PrintWriter out = null; + //todo: FileSystem#setPermission() - should this make sure to set 777 on jobs/ ? + Path keyfile= new Path(getPath(type) + "/" + id + "/" + key); + try { + // This will replace the old value if there is one + // Overwrite the existing file + out = new PrintWriter(new OutputStreamWriter(fs.create(keyfile))); + out.write(val); + out.flush(); + } catch (Exception e) { + String errMsg = "Couldn't write to " + keyfile + ": " + e.getMessage(); + LOG.error(errMsg, e); + throw new NotFoundException(errMsg, e); + } finally { + close(out); } + } - @Override - public Map getFields(Type type, String id) { - HashMap map = new HashMap(); - BufferedReader in = null; - Path p = new Path(getPath(type) + "/" + id); - try { - for (FileStatus status : fs.listStatus(p)) { - in = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); - String line = null; - String val = ""; - while ((line = in.readLine()) != null) { - if (!val.equals("")) { - val += "\n"; - } - val += line; - } - map.put(status.getPath().getName(), val); - } - } catch (IOException e) { - LOG.trace("Couldn't find " + p); - } finally { - close(in); + @Override + public String getField(Type type, String id, String key) { + BufferedReader in = null; + Path p = new Path(getPath(type) + "/" + id + "/" + key); + try { + in = new BufferedReader(new InputStreamReader(fs.open(p))); + String line = null; + String val = ""; + while ((line = in.readLine()) != null) { + if (!val.equals("")) { + val += "\n"; } - return map; + val += line; + } + return val; + } catch (Exception e) { + LOG.info("Couldn't find " + p + ": " + e.getMessage(), e); + } finally { + close(in); } + return null; + } - @Override - public boolean delete(Type type, String id) throws NotFoundException { - Path p = new Path(getPath(type) + "/" + id); - try { - fs.delete(p, true); - } catch (IOException e) { - throw new NotFoundException("Node " + p + " was not found: " + - e.getMessage()); + @Override + public Map getFields(Type type, String id) { + HashMap map = new HashMap(); + BufferedReader in = null; + Path p = new Path(getPath(type) + "/" + id); + try { + for (FileStatus status : fs.listStatus(p)) { + in = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); + String line = null; + String val = ""; + while ((line = in.readLine()) != null) { + if (!val.equals("")) { + val += "\n"; + } + val += line; } - return false; + map.put(status.getPath().getName(), val); + } + } catch (IOException e) { + LOG.trace("Couldn't find " + p); + } finally { + close(in); } + return map; + } - @Override - public List getAll() { - ArrayList allNodes = new ArrayList(); - for (Type type : Type.values()) { - allNodes.addAll(getAllForType(type)); - } - return allNodes; + @Override + public boolean delete(Type type, String id) throws NotFoundException { + Path p = new Path(getPath(type) + "/" + id); + try { + fs.delete(p, true); + } catch (IOException e) { + throw new NotFoundException("Node " + p + " was not found: " + + e.getMessage()); } + return false; + } - @Override - public List getAllForType(Type type) { - ArrayList allNodes = new ArrayList(); - try { - for (FileStatus status : fs.listStatus(new Path(getPath(type)))) { - allNodes.add(status.getPath().getName()); - } - return null; - } catch (Exception e) { - LOG.trace("Couldn't find children for type " + type.toString()); - } - return allNodes; + @Override + public List getAll() { + ArrayList allNodes = new ArrayList(); + for (Type type : Type.values()) { + allNodes.addAll(getAllForType(type)); } + return allNodes; + } - @Override - public List getAllForKey(String key, String value) { - ArrayList allNodes = new ArrayList(); - try { - for (Type type : Type.values()) { - allNodes.addAll(getAllForTypeAndKey(type, key, value)); - } - } catch (Exception e) { - LOG.trace("Couldn't find children for key " + key + ": " + - e.getMessage()); - } - return allNodes; + @Override + public List getAllForType(Type type) { + ArrayList allNodes = new ArrayList(); + try { + for (FileStatus status : fs.listStatus(new Path(getPath(type)))) { + allNodes.add(status.getPath().getName()); + } + return null; + } catch (Exception e) { + LOG.trace("Couldn't find children for type " + type.toString()); } + return allNodes; + } - @Override - public List getAllForTypeAndKey(Type type, String key, String value) { - ArrayList allNodes = new ArrayList(); - HashMap map = new HashMap(); - try { - for (FileStatus status : - fs.listStatus(new Path(getPath(type)))) { - map = (HashMap) - getFields(type, status.getPath().getName()); - if (map.get(key).equals(value)) { - allNodes.add(status.getPath().getName()); - } - } - } catch (Exception e) { - LOG.trace("Couldn't find children for key " + key + ": " + - e.getMessage()); - } - return allNodes; + @Override + public List getAllForKey(String key, String value) { + ArrayList allNodes = new ArrayList(); + try { + for (Type type : Type.values()) { + allNodes.addAll(getAllForTypeAndKey(type, key, value)); + } + } catch (Exception e) { + LOG.trace("Couldn't find children for key " + key + ": " + + e.getMessage()); } + return allNodes; + } - @Override - public void openStorage(Configuration config) throws IOException { - storage_root = config.get(TempletonStorage.STORAGE_ROOT); - if (fs == null) { - fs = FileSystem.get(config); + @Override + public List getAllForTypeAndKey(Type type, String key, String value) { + ArrayList allNodes = new ArrayList(); + HashMap map = new HashMap(); + try { + for (FileStatus status : + fs.listStatus(new Path(getPath(type)))) { + map = (HashMap) + getFields(type, status.getPath().getName()); + if (map.get(key).equals(value)) { + allNodes.add(status.getPath().getName()); } + } + } catch (Exception e) { + LOG.trace("Couldn't find children for key " + key + ": " + + e.getMessage()); } + return allNodes; + } - @Override - public void closeStorage() throws IOException { - // Nothing to do here + @Override + public void openStorage(Configuration config) throws IOException { + storage_root = config.get(TempletonStorage.STORAGE_ROOT); + if (fs == null) { + fs = FileSystem.get(config); } + } - /** - * Get the path to storage based on the type. - * @param type - */ - public String getPath(Type type) { - return getPath(type, storage_root); - } + @Override + public void closeStorage() throws IOException { + // Nothing to do here + } - /** - * Static method to get the path based on the type. - * - * @param type - * @param root - */ - public static String getPath(Type type, String root) { - String typepath = root + OVERHEAD_PATH; - switch (type) { - case JOB: - typepath = root + JOB_PATH; - break; - case JOBTRACKING: - typepath = root + JOB_TRACKINGPATH; - break; - } - return typepath; + /** + * Get the path to storage based on the type. + * @param type + */ + public String getPath(Type type) { + return getPath(type, storage_root); + } + + /** + * Static method to get the path based on the type. + * + * @param type + * @param root + */ + public static String getPath(Type type, String root) { + String typepath = root + OVERHEAD_PATH; + switch (type) { + case JOB: + typepath = root + JOB_PATH; + break; + case JOBTRACKING: + typepath = root + JOB_TRACKINGPATH; + break; } - private void close(Closeable is) { - if(is == null) { - return; - } - try { - is.close(); - } - catch (IOException ex) { - LOG.trace("Failed to close InputStream: " + ex.getMessage()); - } + return typepath; + } + private void close(Closeable is) { + if(is == null) { + return; + } + try { + is.close(); + } + catch (IOException ex) { + LOG.trace("Failed to close InputStream: " + ex.getMessage()); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java index f76465f..d710320 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java @@ -32,313 +32,313 @@ */ public class JobState { - private static final Log LOG = LogFactory.getLog(JobState.class); + private static final Log LOG = LogFactory.getLog(JobState.class); - private String id; + private String id; - // Storage is instantiated in the constructor - private TempletonStorage storage = null; + // Storage is instantiated in the constructor + private TempletonStorage storage = null; - private static TempletonStorage.Type type = TempletonStorage.Type.JOB; + private static TempletonStorage.Type type = TempletonStorage.Type.JOB; - private Configuration config = null; + private Configuration config = null; - public JobState(String id, Configuration conf) - throws IOException { - this.id = id; - config = conf; - storage = getStorage(conf); - } - - public void delete() - throws IOException { - try { - storage.delete(type, id); - } catch (Exception e) { - // Error getting children of node -- probably node has been deleted - LOG.info("Couldn't delete " + id); - } - } - - /** - * Get an instance of the selected storage class. Defaults to - * HDFS storage if none is specified. - */ - public static TempletonStorage getStorageInstance(Configuration conf) { - TempletonStorage storage = null; - try { - storage = (TempletonStorage) - Class.forName(conf.get(TempletonStorage.STORAGE_CLASS)) - .newInstance(); - } catch (Exception e) { - LOG.warn("No storage method found: " + e.getMessage()); - try { - storage = new HDFSStorage(); - } catch (Exception ex) { - LOG.error("Couldn't create storage."); - } - } - return storage; - } - - /** - * Get an open instance of the selected storage class. Defaults - * to HDFS storage if none is specified. - */ - public static TempletonStorage getStorage(Configuration conf) throws IOException { - TempletonStorage storage = getStorageInstance(conf); - storage.openStorage(conf); - return storage; - } - - /** - * For storage methods that require a connection, this is a hint - * that it's time to close the connection. - */ - public void close() throws IOException { - storage.closeStorage(); - } - - // - // Properties - // - - /** - * This job id. - */ - public String getId() { - return id; - } - - /** - * The percent complete of a job - */ - public String getPercentComplete() - throws IOException { - return getField("percentComplete"); - } + public JobState(String id, Configuration conf) + throws IOException { + this.id = id; + config = conf; + storage = getStorage(conf); + } - public void setPercentComplete(String percent) - throws IOException { - setField("percentComplete", percent); + public void delete() + throws IOException { + try { + storage.delete(type, id); + } catch (Exception e) { + // Error getting children of node -- probably node has been deleted + LOG.info("Couldn't delete " + id); } - - /** - * The child id of TempletonControllerJob - */ - public String getChildId() - throws IOException { - return getField("childid"); + } + + /** + * Get an instance of the selected storage class. Defaults to + * HDFS storage if none is specified. + */ + public static TempletonStorage getStorageInstance(Configuration conf) { + TempletonStorage storage = null; + try { + storage = (TempletonStorage) + Class.forName(conf.get(TempletonStorage.STORAGE_CLASS)) + .newInstance(); + } catch (Exception e) { + LOG.warn("No storage method found: " + e.getMessage()); + try { + storage = new HDFSStorage(); + } catch (Exception ex) { + LOG.error("Couldn't create storage."); + } } - - public void setChildId(String childid) - throws IOException { - setField("childid", childid); + return storage; + } + + /** + * Get an open instance of the selected storage class. Defaults + * to HDFS storage if none is specified. + */ + public static TempletonStorage getStorage(Configuration conf) throws IOException { + TempletonStorage storage = getStorageInstance(conf); + storage.openStorage(conf); + return storage; + } + + /** + * For storage methods that require a connection, this is a hint + * that it's time to close the connection. + */ + public void close() throws IOException { + storage.closeStorage(); + } + + // + // Properties + // + + /** + * This job id. + */ + public String getId() { + return id; + } + + /** + * The percent complete of a job + */ + public String getPercentComplete() + throws IOException { + return getField("percentComplete"); + } + + public void setPercentComplete(String percent) + throws IOException { + setField("percentComplete", percent); + } + + /** + * The child id of TempletonControllerJob + */ + public String getChildId() + throws IOException { + return getField("childid"); + } + + public void setChildId(String childid) + throws IOException { + setField("childid", childid); + } + + /** + * Add a jobid to the list of children of this job. + * + * @param jobid + * @throws IOException + */ + public void addChild(String jobid) throws IOException { + String jobids = ""; + try { + jobids = getField("children"); + } catch (Exception e) { + // There are none or they're not readable. } - - /** - * Add a jobid to the list of children of this job. - * - * @param jobid - * @throws IOException - */ - public void addChild(String jobid) throws IOException { - String jobids = ""; - try { - jobids = getField("children"); - } catch (Exception e) { - // There are none or they're not readable. - } - if (!jobids.equals("")) { - jobids += ","; - } - jobids += jobid; - setField("children", jobids); + if (!jobids.equals("")) { + jobids += ","; } - - /** - * Get a list of jobstates for jobs that are children of this job. - * @throws IOException - */ - public List getChildren() throws IOException { - ArrayList children = new ArrayList(); - for (String jobid : getField("children").split(",")) { - children.add(new JobState(jobid, config)); - } - return children; + jobids += jobid; + setField("children", jobids); + } + + /** + * Get a list of jobstates for jobs that are children of this job. + * @throws IOException + */ + public List getChildren() throws IOException { + ArrayList children = new ArrayList(); + for (String jobid : getField("children").split(",")) { + children.add(new JobState(jobid, config)); } - - /** - * Save a comma-separated list of jobids that are children - * of this job. - * @param jobids - * @throws IOException - */ - public void setChildren(String jobids) throws IOException { - setField("children", jobids); + return children; + } + + /** + * Save a comma-separated list of jobids that are children + * of this job. + * @param jobids + * @throws IOException + */ + public void setChildren(String jobids) throws IOException { + setField("children", jobids); + } + + /** + * Set the list of child jobs of this job + * @param children + */ + public void setChildren(List children) throws IOException { + String val = ""; + for (JobState jobstate : children) { + if (!val.equals("")) { + val += ","; + } + val += jobstate.getId(); } - - /** - * Set the list of child jobs of this job - * @param children - */ - public void setChildren(List children) throws IOException { - String val = ""; - for (JobState jobstate : children) { - if (!val.equals("")) { - val += ","; - } - val += jobstate.getId(); - } - setField("children", val); + setField("children", val); + } + + /** + * The system exit value of the job. + */ + public Long getExitValue() + throws IOException { + return getLongField("exitValue"); + } + + public void setExitValue(long exitValue) + throws IOException { + setLongField("exitValue", exitValue); + } + + /** + * When this job was created. + */ + public Long getCreated() + throws IOException { + return getLongField("created"); + } + + public void setCreated(long created) + throws IOException { + setLongField("created", created); + } + + /** + * The user who started this job. + */ + public String getUser() + throws IOException { + return getField("user"); + } + + public void setUser(String user) + throws IOException { + setField("user", user); + } + + /** + * The url callback + */ + public String getCallback() + throws IOException { + return getField("callback"); + } + + public void setCallback(String callback) + throws IOException { + setField("callback", callback); + } + + /** + * The status of a job once it is completed. + */ + public String getCompleteStatus() + throws IOException { + return getField("completed"); + } + + public void setCompleteStatus(String complete) + throws IOException { + setField("completed", complete); + } + + /** + * The time when the callback was sent. + */ + public Long getNotifiedTime() + throws IOException { + return getLongField("notified"); + } + + public void setNotifiedTime(long notified) + throws IOException { + setLongField("notified", notified); + } + + // + // Helpers + // + + /** + * Fetch an integer field from the store. + */ + public Long getLongField(String name) + throws IOException { + String s = storage.getField(type, id, name); + if (s == null) + return null; + else { + try { + return new Long(s); + } catch (NumberFormatException e) { + LOG.error("templeton: bug " + name + " " + s + " : " + e); + return null; + } } - - /** - * The system exit value of the job. - */ - public Long getExitValue() - throws IOException { - return getLongField("exitValue"); - } - - public void setExitValue(long exitValue) - throws IOException { - setLongField("exitValue", exitValue); + } + + /** + * Store a String field from the store. + */ + public void setField(String name, String val) + throws IOException { + try { + storage.saveField(type, id, name, val); + } catch (NotFoundException ne) { + throw new IOException(ne.getMessage()); } - - /** - * When this job was created. - */ - public Long getCreated() - throws IOException { - return getLongField("created"); - } - - public void setCreated(long created) - throws IOException { - setLongField("created", created); + } + + public String getField(String name) + throws IOException { + return storage.getField(type, id, name); + } + + /** + * Store a long field. + * + * @param name + * @param val + * @throws IOException + */ + public void setLongField(String name, long val) + throws IOException { + try { + storage.saveField(type, id, name, String.valueOf(val)); + } catch (NotFoundException ne) { + throw new IOException("Job " + id + " was not found: " + + ne.getMessage()); } - - /** - * The user who started this job. - */ - public String getUser() - throws IOException { - return getField("user"); - } - - public void setUser(String user) - throws IOException { - setField("user", user); - } - - /** - * The url callback - */ - public String getCallback() - throws IOException { - return getField("callback"); - } - - public void setCallback(String callback) - throws IOException { - setField("callback", callback); - } - - /** - * The status of a job once it is completed. - */ - public String getCompleteStatus() - throws IOException { - return getField("completed"); - } - - public void setCompleteStatus(String complete) - throws IOException { - setField("completed", complete); - } - - /** - * The time when the callback was sent. - */ - public Long getNotifiedTime() - throws IOException { - return getLongField("notified"); - } - - public void setNotifiedTime(long notified) - throws IOException { - setLongField("notified", notified); - } - - // - // Helpers - // - - /** - * Fetch an integer field from the store. - */ - public Long getLongField(String name) - throws IOException { - String s = storage.getField(type, id, name); - if (s == null) - return null; - else { - try { - return new Long(s); - } catch (NumberFormatException e) { - LOG.error("templeton: bug " + name + " " + s + " : " + e); - return null; - } - } - } - - /** - * Store a String field from the store. - */ - public void setField(String name, String val) - throws IOException { - try { - storage.saveField(type, id, name, val); - } catch (NotFoundException ne) { - throw new IOException(ne.getMessage()); - } - } - - public String getField(String name) - throws IOException { - return storage.getField(type, id, name); - } - - /** - * Store a long field. - * - * @param name - * @param val - * @throws IOException - */ - public void setLongField(String name, long val) - throws IOException { - try { - storage.saveField(type, id, name, String.valueOf(val)); - } catch (NotFoundException ne) { - throw new IOException("Job " + id + " was not found: " + - ne.getMessage()); - } - } - - /** - * Get an id for each currently existing job, which can be used to create - * a JobState object. - * - * @param conf - * @throws IOException - */ - public static List getJobs(Configuration conf) throws IOException { - try { - return getStorage(conf).getAllForType(type); - } catch (Exception e) { - throw new IOException("Can't get jobs", e); - } + } + + /** + * Get an id for each currently existing job, which can be used to create + * a JobState object. + * + * @param conf + * @throws IOException + */ + public static List getJobs(Configuration conf) throws IOException { + try { + return getStorage(conf).getAllForType(type); + } catch (Exception e) { + throw new IOException("Can't get jobs", e); } + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobStateTracker.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobStateTracker.java index ad1880a..da3ad20 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobStateTracker.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobStateTracker.java @@ -32,115 +32,115 @@ import org.apache.zookeeper.data.Stat; public class JobStateTracker { - // The path to the tracking root - private String job_trackingroot = null; + // The path to the tracking root + private String job_trackingroot = null; - // The zookeeper connection to use - private ZooKeeper zk; + // The zookeeper connection to use + private ZooKeeper zk; - // The id of the tracking node -- must be a SEQUENTIAL node - private String trackingnode; + // The id of the tracking node -- must be a SEQUENTIAL node + private String trackingnode; - // The id of the job this tracking node represents - private String jobid; + // The id of the job this tracking node represents + private String jobid; - // The logger - private static final Log LOG = LogFactory.getLog(JobStateTracker.class); + // The logger + private static final Log LOG = LogFactory.getLog(JobStateTracker.class); - /** - * Constructor for a new node -- takes the jobid of an existing job - * - */ - public JobStateTracker(String node, ZooKeeper zk, boolean nodeIsTracker, - String job_trackingpath) { - this.zk = zk; - if (nodeIsTracker) { - trackingnode = node; - } else { - jobid = node; - } - job_trackingroot = job_trackingpath; + /** + * Constructor for a new node -- takes the jobid of an existing job + * + */ + public JobStateTracker(String node, ZooKeeper zk, boolean nodeIsTracker, + String job_trackingpath) { + this.zk = zk; + if (nodeIsTracker) { + trackingnode = node; + } else { + jobid = node; } + job_trackingroot = job_trackingpath; + } - /** - * Create the parent znode for this job state. - */ - public void create() - throws IOException { - String[] paths = ZooKeeperStorage.getPaths(job_trackingroot); - for (String znode : paths) { - try { - zk.create(znode, new byte[0], - Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - } catch (KeeperException.NodeExistsException e) { - } catch (Exception e) { - throw new IOException("Unable to create parent nodes"); - } - } - try { - trackingnode = zk.create(makeTrackingZnode(), jobid.getBytes(), - Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL); - } catch (Exception e) { - throw new IOException("Unable to create " + makeTrackingZnode()); - } + /** + * Create the parent znode for this job state. + */ + public void create() + throws IOException { + String[] paths = ZooKeeperStorage.getPaths(job_trackingroot); + for (String znode : paths) { + try { + zk.create(znode, new byte[0], + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + } catch (KeeperException.NodeExistsException e) { + } catch (Exception e) { + throw new IOException("Unable to create parent nodes"); + } } - - public void delete() - throws IOException { - try { - zk.delete(makeTrackingJobZnode(trackingnode), -1); - } catch (Exception e) { - // Might have been deleted already - LOG.info("Couldn't delete " + makeTrackingJobZnode(trackingnode)); - } + try { + trackingnode = zk.create(makeTrackingZnode(), jobid.getBytes(), + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL); + } catch (Exception e) { + throw new IOException("Unable to create " + makeTrackingZnode()); } + } - /** - * Get the jobid for this tracking node - * @throws IOException - */ - public String getJobID() throws IOException { - try { - return new String(zk.getData(makeTrackingJobZnode(trackingnode), - false, new Stat())); - } catch (KeeperException e) { - // It was deleted during the transaction - throw new IOException("Node already deleted " + trackingnode); - } catch (InterruptedException e) { - throw new IOException("Couldn't read node " + trackingnode); - } + public void delete() + throws IOException { + try { + zk.delete(makeTrackingJobZnode(trackingnode), -1); + } catch (Exception e) { + // Might have been deleted already + LOG.info("Couldn't delete " + makeTrackingJobZnode(trackingnode)); } + } - /** - * Make a ZK path to a new tracking node - */ - public String makeTrackingZnode() { - return job_trackingroot + "/"; + /** + * Get the jobid for this tracking node + * @throws IOException + */ + public String getJobID() throws IOException { + try { + return new String(zk.getData(makeTrackingJobZnode(trackingnode), + false, new Stat())); + } catch (KeeperException e) { + // It was deleted during the transaction + throw new IOException("Node already deleted " + trackingnode); + } catch (InterruptedException e) { + throw new IOException("Couldn't read node " + trackingnode); } + } - /** - * Make a ZK path to an existing tracking node - */ - public String makeTrackingJobZnode(String nodename) { - return job_trackingroot + "/" + nodename; - } + /** + * Make a ZK path to a new tracking node + */ + public String makeTrackingZnode() { + return job_trackingroot + "/"; + } + + /** + * Make a ZK path to an existing tracking node + */ + public String makeTrackingJobZnode(String nodename) { + return job_trackingroot + "/" + nodename; + } - /* - * Get the list of tracking jobs. These can be used to determine which jobs have - * expired. - */ - public static List getTrackingJobs(Configuration conf, ZooKeeper zk) - throws IOException { - ArrayList jobs = new ArrayList(); - try { - for (String myid : zk.getChildren( - conf.get(TempletonStorage.STORAGE_ROOT) - + ZooKeeperStorage.TRACKINGDIR, false)) { - jobs.add(myid); - } - } catch (Exception e) { - throw new IOException("Can't get tracking children", e); - } - return jobs; + /* + * Get the list of tracking jobs. These can be used to determine which jobs have + * expired. + */ + public static List getTrackingJobs(Configuration conf, ZooKeeper zk) + throws IOException { + ArrayList jobs = new ArrayList(); + try { + for (String myid : zk.getChildren( + conf.get(TempletonStorage.STORAGE_ROOT) + + ZooKeeperStorage.TRACKINGDIR, false)) { + jobs.add(myid); + } + } catch (Exception e) { + throw new IOException("Can't get tracking children", e); } + return jobs; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NotFoundException.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NotFoundException.java index 4949098..84da803 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NotFoundException.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NotFoundException.java @@ -22,12 +22,12 @@ * Simple not found exception. */ public class NotFoundException extends Exception { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - public NotFoundException(String msg) { - super(msg); - } - public NotFoundException(String msg, Throwable rootCause) { - super(msg, rootCause); - } + public NotFoundException(String msg) { + super(msg); + } + public NotFoundException(String msg, Throwable rootCause) { + super(msg, rootCause); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullRecordReader.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullRecordReader.java index 8fe2184..f000b7e 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullRecordReader.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullRecordReader.java @@ -29,34 +29,34 @@ * An empty record reader. */ public class NullRecordReader - extends RecordReader { - @Override - public void initialize(InputSplit genericSplit, TaskAttemptContext context) - throws IOException { - } - - @Override - public void close() throws IOException { - } - - @Override - public NullWritable getCurrentKey() { - return NullWritable.get(); - } - - @Override - public NullWritable getCurrentValue() { - return NullWritable.get(); - } - - @Override - public float getProgress() { - return 1.0f; - } - - @Override - public boolean nextKeyValue() throws IOException { - return false; - } + extends RecordReader { + @Override + public void initialize(InputSplit genericSplit, TaskAttemptContext context) + throws IOException { + } + + @Override + public void close() throws IOException { + } + + @Override + public NullWritable getCurrentKey() { + return NullWritable.get(); + } + + @Override + public NullWritable getCurrentValue() { + return NullWritable.get(); + } + + @Override + public float getProgress() { + return 1.0f; + } + + @Override + public boolean nextKeyValue() throws IOException { + return false; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullSplit.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullSplit.java index e2cf5f9..67ac256 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullSplit.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullSplit.java @@ -28,16 +28,16 @@ * An empty splitter. */ public class NullSplit extends InputSplit implements Writable { - public long getLength() { return 0; } + public long getLength() { return 0; } - public String[] getLocations() throws IOException { - return new String[]{}; - } + public String[] getLocations() throws IOException { + return new String[]{}; + } - @Override - public void write(DataOutput out) throws IOException {} + @Override + public void write(DataOutput out) throws IOException {} - @Override - public void readFields(DataInput in) throws IOException {} + @Override + public void readFields(DataInput in) throws IOException {} } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/SingleInputFormat.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/SingleInputFormat.java index a638174..1f90137 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/SingleInputFormat.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/SingleInputFormat.java @@ -33,19 +33,19 @@ * An empty InputFormat. */ public class SingleInputFormat - extends InputFormat { - public List getSplits(JobContext job) - throws IOException { - List res = new ArrayList(); - res.add(new NullSplit()); - return res; - } + extends InputFormat { + public List getSplits(JobContext job) + throws IOException { + List res = new ArrayList(); + res.add(new NullSplit()); + return res; + } - public RecordReader - createRecordReader(InputSplit split, - TaskAttemptContext context) - throws IOException { - return new NullRecordReader(); - } + public RecordReader + createRecordReader(InputSplit split, + TaskAttemptContext context) + throws IOException { + return new NullRecordReader(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java index cb0bf71..4deafbb 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java @@ -67,285 +67,285 @@ * in hdfs files. */ public class TempletonControllerJob extends Configured implements Tool { - public static final String COPY_NAME = "templeton.copy"; - public static final String STATUSDIR_NAME = "templeton.statusdir"; - public static final String JAR_ARGS_NAME = "templeton.args"; - public static final String OVERRIDE_CLASSPATH = "templeton.override-classpath"; - - public static final String STDOUT_FNAME = "stdout"; - public static final String STDERR_FNAME = "stderr"; - public static final String EXIT_FNAME = "exit"; - - public static final int WATCHER_TIMEOUT_SECS = 10; - public static final int KEEP_ALIVE_MSEC = 60 * 1000; - - public static final String TOKEN_FILE_ARG_PLACEHOLDER - = "__WEBHCAT_TOKEN_FILE_LOCATION__"; - - - private static TrivialExecService execService = TrivialExecService.getInstance(); - - private static final Log LOG = LogFactory.getLog(TempletonControllerJob.class); - - - public static class LaunchMapper - extends Mapper { - protected Process startJob(Context context, String user, - String overrideClasspath) - throws IOException, InterruptedException { - Configuration conf = context.getConfiguration(); - copyLocal(COPY_NAME, conf); - String[] jarArgs - = TempletonUtils.decodeArray(conf.get(JAR_ARGS_NAME)); - - ArrayList removeEnv = new ArrayList(); - removeEnv.add("HADOOP_ROOT_LOGGER"); - Map env = TempletonUtils.hadoopUserEnv(user, - overrideClasspath); - List jarArgsList = new LinkedList(Arrays.asList(jarArgs)); - String tokenFile = System.getenv("HADOOP_TOKEN_FILE_LOCATION"); - - - if (tokenFile != null) { - //Token is available, so replace the placeholder - String tokenArg = "mapreduce.job.credentials.binary=" + tokenFile; - for(int i=0; i it = jarArgsList.iterator(); - while(it.hasNext()){ - String arg = it.next(); - if(arg.contains(TOKEN_FILE_ARG_PLACEHOLDER)){ - it.remove(); - } - } - } - return execService.run(jarArgsList, removeEnv, env); - } + public static final String COPY_NAME = "templeton.copy"; + public static final String STATUSDIR_NAME = "templeton.statusdir"; + public static final String JAR_ARGS_NAME = "templeton.args"; + public static final String OVERRIDE_CLASSPATH = "templeton.override-classpath"; - private void copyLocal(String var, Configuration conf) - throws IOException { - String[] filenames = TempletonUtils.decodeArray(conf.get(var)); - if (filenames != null) { - for (String filename : filenames) { - Path src = new Path(filename); - Path dst = new Path(src.getName()); - FileSystem fs = src.getFileSystem(conf); - System.err.println("templeton: copy " + src + " => " + dst); - fs.copyToLocalFile(src, dst); - } - } - } + public static final String STDOUT_FNAME = "stdout"; + public static final String STDERR_FNAME = "stderr"; + public static final String EXIT_FNAME = "exit"; - @Override - public void run(Context context) - throws IOException, InterruptedException { + public static final int WATCHER_TIMEOUT_SECS = 10; + public static final int KEEP_ALIVE_MSEC = 60 * 1000; - Configuration conf = context.getConfiguration(); + public static final String TOKEN_FILE_ARG_PLACEHOLDER + = "__WEBHCAT_TOKEN_FILE_LOCATION__"; - Process proc = startJob(context, - conf.get("user.name"), - conf.get(OVERRIDE_CLASSPATH)); - String statusdir = conf.get(STATUSDIR_NAME); + private static TrivialExecService execService = TrivialExecService.getInstance(); - if (statusdir != null) { - statusdir = TempletonUtils.addUserHomeDirectoryIfApplicable(statusdir, conf.get("user.name"), conf); - } + private static final Log LOG = LogFactory.getLog(TempletonControllerJob.class); - ExecutorService pool = Executors.newCachedThreadPool(); - executeWatcher(pool, conf, context.getJobID(), - proc.getInputStream(), statusdir, STDOUT_FNAME); - executeWatcher(pool, conf, context.getJobID(), - proc.getErrorStream(), statusdir, STDERR_FNAME); - KeepAlive keepAlive = startCounterKeepAlive(pool, context); - - proc.waitFor(); - keepAlive.sendReport = false; - pool.shutdown(); - if (!pool.awaitTermination(WATCHER_TIMEOUT_SECS, TimeUnit.SECONDS)) - pool.shutdownNow(); - - writeExitValue(conf, proc.exitValue(), statusdir); - JobState state = new JobState(context.getJobID().toString(), conf); - state.setExitValue(proc.exitValue()); - state.setCompleteStatus("done"); - state.close(); - - if (proc.exitValue() != 0) - System.err.println("templeton: job failed with exit code " - + proc.exitValue()); - else - System.err.println("templeton: job completed with exit code 0"); - } - private void executeWatcher(ExecutorService pool, Configuration conf, - JobID jobid, InputStream in, String statusdir, - String name) - throws IOException { - Watcher w = new Watcher(conf, jobid, in, statusdir, name); - pool.execute(w); - } + public static class LaunchMapper + extends Mapper { + protected Process startJob(Context context, String user, + String overrideClasspath) + throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + copyLocal(COPY_NAME, conf); + String[] jarArgs + = TempletonUtils.decodeArray(conf.get(JAR_ARGS_NAME)); - private KeepAlive startCounterKeepAlive(ExecutorService pool, Context cnt) - throws IOException { - KeepAlive k = new KeepAlive(cnt); - pool.execute(k); - return k; + ArrayList removeEnv = new ArrayList(); + removeEnv.add("HADOOP_ROOT_LOGGER"); + Map env = TempletonUtils.hadoopUserEnv(user, + overrideClasspath); + List jarArgsList = new LinkedList(Arrays.asList(jarArgs)); + String tokenFile = System.getenv("HADOOP_TOKEN_FILE_LOCATION"); + + + if (tokenFile != null) { + //Token is available, so replace the placeholder + String tokenArg = "mapreduce.job.credentials.binary=" + tokenFile; + for(int i=0; i it = jarArgsList.iterator(); + while(it.hasNext()){ + String arg = it.next(); + if(arg.contains(TOKEN_FILE_ARG_PLACEHOLDER)){ + it.remove(); + } } + } + return execService.run(jarArgsList, removeEnv, env); } - private static class Watcher implements Runnable { - private InputStream in; - private OutputStream out; - private JobID jobid; - private Configuration conf; - - public Watcher(Configuration conf, JobID jobid, InputStream in, - String statusdir, String name) - throws IOException { - this.conf = conf; - this.jobid = jobid; - this.in = in; - - if (name.equals(STDERR_FNAME)) - out = System.err; - else - out = System.out; - - if (TempletonUtils.isset(statusdir)) { - Path p = new Path(statusdir, name); - FileSystem fs = p.getFileSystem(conf); - out = fs.create(p); - System.err.println("templeton: Writing status to " + p); - } + private void copyLocal(String var, Configuration conf) + throws IOException { + String[] filenames = TempletonUtils.decodeArray(conf.get(var)); + if (filenames != null) { + for (String filename : filenames) { + Path src = new Path(filename); + Path dst = new Path(src.getName()); + FileSystem fs = src.getFileSystem(conf); + System.err.println("templeton: copy " + src + " => " + dst); + fs.copyToLocalFile(src, dst); } + } + } - @Override - public void run() { - try { - InputStreamReader isr = new InputStreamReader(in); - BufferedReader reader = new BufferedReader(isr); - PrintWriter writer = new PrintWriter(out); - - String line; - while ((line = reader.readLine()) != null) { - writer.println(line); - JobState state = null; - try { - String percent = TempletonUtils.extractPercentComplete(line); - String childid = TempletonUtils.extractChildJobId(line); - - if (percent != null || childid != null) { - state = new JobState(jobid.toString(), conf); - state.setPercentComplete(percent); - state.setChildId(childid); - } - } catch (IOException e) { - System.err.println("templeton: state error: " + e); - } finally { - if (state != null) { - try { - state.close(); - } catch (IOException e) { - } - } - } - } - writer.flush(); - } catch (IOException e) { - System.err.println("templeton: execute error: " + e); - } - } + @Override + public void run(Context context) + throws IOException, InterruptedException { + + Configuration conf = context.getConfiguration(); + + Process proc = startJob(context, + conf.get("user.name"), + conf.get(OVERRIDE_CLASSPATH)); + + String statusdir = conf.get(STATUSDIR_NAME); + + if (statusdir != null) { + statusdir = TempletonUtils.addUserHomeDirectoryIfApplicable(statusdir, conf.get("user.name"), conf); + } + + ExecutorService pool = Executors.newCachedThreadPool(); + executeWatcher(pool, conf, context.getJobID(), + proc.getInputStream(), statusdir, STDOUT_FNAME); + executeWatcher(pool, conf, context.getJobID(), + proc.getErrorStream(), statusdir, STDERR_FNAME); + KeepAlive keepAlive = startCounterKeepAlive(pool, context); + + proc.waitFor(); + keepAlive.sendReport = false; + pool.shutdown(); + if (!pool.awaitTermination(WATCHER_TIMEOUT_SECS, TimeUnit.SECONDS)) + pool.shutdownNow(); + + writeExitValue(conf, proc.exitValue(), statusdir); + JobState state = new JobState(context.getJobID().toString(), conf); + state.setExitValue(proc.exitValue()); + state.setCompleteStatus("done"); + state.close(); + + if (proc.exitValue() != 0) + System.err.println("templeton: job failed with exit code " + + proc.exitValue()); + else + System.err.println("templeton: job completed with exit code 0"); } - private static class KeepAlive implements Runnable { - private final Mapper.Context cnt; - private volatile boolean sendReport; + private void executeWatcher(ExecutorService pool, Configuration conf, + JobID jobid, InputStream in, String statusdir, + String name) + throws IOException { + Watcher w = new Watcher(conf, jobid, in, statusdir, name); + pool.execute(w); + } - public KeepAlive(Mapper.Context cnt) { - this.cnt = cnt; - this.sendReport = true; - } + private KeepAlive startCounterKeepAlive(ExecutorService pool, Context cnt) + throws IOException { + KeepAlive k = new KeepAlive(cnt); + pool.execute(k); + return k; + } + + private void writeExitValue(Configuration conf, int exitValue, String statusdir) + throws IOException { + if (TempletonUtils.isset(statusdir)) { + Path p = new Path(statusdir, EXIT_FNAME); + FileSystem fs = p.getFileSystem(conf); + OutputStream out = fs.create(p); + System.err.println("templeton: Writing exit value " + + exitValue + " to " + p); + PrintWriter writer = new PrintWriter(out); + writer.println(exitValue); + writer.close(); + } + } + } + + private static class Watcher implements Runnable { + private InputStream in; + private OutputStream out; + private JobID jobid; + private Configuration conf; + + public Watcher(Configuration conf, JobID jobid, InputStream in, + String statusdir, String name) + throws IOException { + this.conf = conf; + this.jobid = jobid; + this.in = in; + + if (name.equals(STDERR_FNAME)) + out = System.err; + else + out = System.out; + + if (TempletonUtils.isset(statusdir)) { + Path p = new Path(statusdir, name); + FileSystem fs = p.getFileSystem(conf); + out = fs.create(p); + System.err.println("templeton: Writing status to " + p); + } + } - @Override - public void run() { - try { - while (sendReport) { - cnt.progress(); - Thread.sleep(KEEP_ALIVE_MSEC); - } - } catch (InterruptedException e) { - // Ok to be interrupted + @Override + public void run() { + try { + InputStreamReader isr = new InputStreamReader(in); + BufferedReader reader = new BufferedReader(isr); + PrintWriter writer = new PrintWriter(out); + + String line; + while ((line = reader.readLine()) != null) { + writer.println(line); + JobState state = null; + try { + String percent = TempletonUtils.extractPercentComplete(line); + String childid = TempletonUtils.extractChildJobId(line); + + if (percent != null || childid != null) { + state = new JobState(jobid.toString(), conf); + state.setPercentComplete(percent); + state.setChildId(childid); + } + } catch (IOException e) { + System.err.println("templeton: state error: " + e); + } finally { + if (state != null) { + try { + state.close(); + } catch (IOException e) { + } } + } } + writer.flush(); + } catch (IOException e) { + System.err.println("templeton: execute error: " + e); + } } + } - private JobID submittedJobId; + private static class KeepAlive implements Runnable { + private final Mapper.Context cnt; + private volatile boolean sendReport; - public String getSubmittedId() { - if (submittedJobId == null) - return null; - else - return submittedJobId.toString(); + public KeepAlive(Mapper.Context cnt) { + this.cnt = cnt; + this.sendReport = true; } - /** - * Enqueue the job and print out the job id for later collection. - */ @Override - public int run(String[] args) - throws IOException, InterruptedException, ClassNotFoundException { - Configuration conf = getConf(); - conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args)); - conf.set("user.name", UserGroupInformation.getCurrentUser().getShortUserName()); - Job job = new Job(conf); - job.setJarByClass(TempletonControllerJob.class); - job.setJobName("TempletonControllerJob"); - job.setMapperClass(LaunchMapper.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(Text.class); - job.setInputFormatClass(SingleInputFormat.class); - NullOutputFormat of - = new NullOutputFormat(); - job.setOutputFormatClass(of.getClass()); - job.setNumReduceTasks(0); - - JobClient jc = new JobClient(new JobConf(job.getConfiguration())); - - Token mrdt = jc.getDelegationToken(new Text("mr token")); - job.getCredentials().addToken(new Text("mr token"), mrdt); - job.submit(); - - submittedJobId = job.getJobID(); - - return 0; - } - - - public static void main(String[] args) throws Exception { - int ret = ToolRunner.run(new TempletonControllerJob(), args); - if (ret != 0) - System.err.println("TempletonControllerJob failed!"); - System.exit(ret); + public void run() { + try { + while (sendReport) { + cnt.progress(); + Thread.sleep(KEEP_ALIVE_MSEC); + } + } catch (InterruptedException e) { + // Ok to be interrupted + } } + } + + private JobID submittedJobId; + + public String getSubmittedId() { + if (submittedJobId == null) + return null; + else + return submittedJobId.toString(); + } + + /** + * Enqueue the job and print out the job id for later collection. + */ + @Override + public int run(String[] args) + throws IOException, InterruptedException, ClassNotFoundException { + Configuration conf = getConf(); + conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args)); + conf.set("user.name", UserGroupInformation.getCurrentUser().getShortUserName()); + Job job = new Job(conf); + job.setJarByClass(TempletonControllerJob.class); + job.setJobName("TempletonControllerJob"); + job.setMapperClass(LaunchMapper.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(Text.class); + job.setInputFormatClass(SingleInputFormat.class); + NullOutputFormat of + = new NullOutputFormat(); + job.setOutputFormatClass(of.getClass()); + job.setNumReduceTasks(0); + + JobClient jc = new JobClient(new JobConf(job.getConfiguration())); + + Token mrdt = jc.getDelegationToken(new Text("mr token")); + job.getCredentials().addToken(new Text("mr token"), mrdt); + job.submit(); + + submittedJobId = job.getJobID(); + + return 0; + } + + + public static void main(String[] args) throws Exception { + int ret = ToolRunner.run(new TempletonControllerJob(), args); + if (ret != 0) + System.err.println("TempletonControllerJob failed!"); + System.exit(ret); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonStorage.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonStorage.java index ba08556..97d572c 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonStorage.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonStorage.java @@ -43,111 +43,111 @@ * Each field must be available to be fetched/changed individually. */ public interface TempletonStorage { - // These are the possible types referenced by 'type' below. - public enum Type { - UNKNOWN, JOB, JOBTRACKING, TEMPLETONOVERHEAD - } - - public static final String STORAGE_CLASS = "templeton.storage.class"; - public static final String STORAGE_ROOT = "templeton.storage.root"; + // These are the possible types referenced by 'type' below. + public enum Type { + UNKNOWN, JOB, JOBTRACKING, TEMPLETONOVERHEAD + } - /** - * Start the cleanup process for this storage type. - * @param config - */ - public void startCleanup(Configuration config); - - /** - * Save a single key/value pair for a specific job id. - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @param key The name of the field to save - * @param val The value of the field to save - */ - public void saveField(Type type, String id, String key, String val) - throws NotFoundException; + public static final String STORAGE_CLASS = "templeton.storage.class"; + public static final String STORAGE_ROOT = "templeton.storage.root"; - /** - * Get the value of one field for a given data type. If the type - * is UNKNOWN, search for the id in all types. - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @param key The name of the field to retrieve - * @return The value of the field requested, or null if not - * found. - */ - public String getField(Type type, String id, String key); + /** + * Start the cleanup process for this storage type. + * @param config + */ + public void startCleanup(Configuration config); - /** - * Get all the name/value pairs stored for this id. - * Be careful using getFields() -- optimistic locking will mean that - * your odds of a conflict are decreased if you read/write one field - * at a time. getFields() is intended for read-only usage. - * - * If the type is UNKNOWN, search for the id in all types. - * - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @return A Map of key/value pairs found for this type/id. - */ - public Map getFields(Type type, String id); + /** + * Save a single key/value pair for a specific job id. + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @param key The name of the field to save + * @param val The value of the field to save + */ + public void saveField(Type type, String id, String key, String val) + throws NotFoundException; - /** - * Delete a data grouping (all data for a jobid, all tracking data - * for a job, etc.). If the type is UNKNOWN, search for the id - * in all types. - * - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @return True if successful, false if not, throws NotFoundException - * if the id wasn't found. - */ - public boolean delete(Type type, String id) throws NotFoundException; + /** + * Get the value of one field for a given data type. If the type + * is UNKNOWN, search for the id in all types. + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @param key The name of the field to retrieve + * @return The value of the field requested, or null if not + * found. + */ + public String getField(Type type, String id, String key); - /** - * Get the id of each data grouping in the storage system. - * - * @return An ArrayList of ids. - */ - public List getAll(); + /** + * Get all the name/value pairs stored for this id. + * Be careful using getFields() -- optimistic locking will mean that + * your odds of a conflict are decreased if you read/write one field + * at a time. getFields() is intended for read-only usage. + * + * If the type is UNKNOWN, search for the id in all types. + * + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @return A Map of key/value pairs found for this type/id. + */ + public Map getFields(Type type, String id); - /** - * Get the id of each data grouping of a given type in the storage - * system. - * @param type The data type (as listed above) - * @return An ArrayList of ids. - */ - public List getAllForType(Type type); + /** + * Delete a data grouping (all data for a jobid, all tracking data + * for a job, etc.). If the type is UNKNOWN, search for the id + * in all types. + * + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @return True if successful, false if not, throws NotFoundException + * if the id wasn't found. + */ + public boolean delete(Type type, String id) throws NotFoundException; - /** - * Get the id of each data grouping that has the specific key/value - * pair. - * @param key The name of the field to search for - * @param value The value of the field to search for - * @return An ArrayList of ids. - */ - public List getAllForKey(String key, String value); + /** + * Get the id of each data grouping in the storage system. + * + * @return An ArrayList of ids. + */ + public List getAll(); - /** - * Get the id of each data grouping of a given type that has the - * specific key/value pair. - * @param type The data type (as listed above) - * @param key The name of the field to search for - * @param value The value of the field to search for - * @return An ArrayList of ids. - */ - public List getAllForTypeAndKey(Type type, String key, - String value); + /** + * Get the id of each data grouping of a given type in the storage + * system. + * @param type The data type (as listed above) + * @return An ArrayList of ids. + */ + public List getAllForType(Type type); - /** - * For storage methods that require a connection, this is a hint - * that it's time to open a connection. - */ - public void openStorage(Configuration config) throws IOException; + /** + * Get the id of each data grouping that has the specific key/value + * pair. + * @param key The name of the field to search for + * @param value The value of the field to search for + * @return An ArrayList of ids. + */ + public List getAllForKey(String key, String value); - /** - * For storage methods that require a connection, this is a hint - * that it's time to close the connection. - */ - public void closeStorage() throws IOException; + /** + * Get the id of each data grouping of a given type that has the + * specific key/value pair. + * @param type The data type (as listed above) + * @param key The name of the field to search for + * @param value The value of the field to search for + * @return An ArrayList of ids. + */ + public List getAllForTypeAndKey(Type type, String key, + String value); + + /** + * For storage methods that require a connection, this is a hint + * that it's time to open a connection. + */ + public void openStorage(Configuration config) throws IOException; + + /** + * For storage methods that require a connection, this is a hint + * that it's time to close the connection. + */ + public void closeStorage() throws IOException; } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java index 61002b9..1c91890 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java @@ -46,257 +46,257 @@ * General utility methods. */ public class TempletonUtils { - /** - * Is the object non-empty? - */ - public static boolean isset(String s) { - return (s != null) && (s.length() > 0); + /** + * Is the object non-empty? + */ + public static boolean isset(String s) { + return (s != null) && (s.length() > 0); + } + + /** + * Is the object non-empty? + */ + public static boolean isset(char ch) { + return (ch != 0); + } + + /** + * Is the object non-empty? + */ + public static boolean isset(T[] a) { + return (a != null) && (a.length > 0); + } + + + /** + * Is the object non-empty? + */ + public static boolean isset(Collection col) { + return (col != null) && (!col.isEmpty()); + } + + /** + * Is the object non-empty? + */ + public static boolean isset(Map col) { + return (col != null) && (!col.isEmpty()); + } + + + public static final Pattern JAR_COMPLETE + = Pattern.compile(" map \\d+%\\s+reduce \\d+%$"); + public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$"); + + /** + * Extract the percent complete line from Pig or Jar jobs. + */ + public static String extractPercentComplete(String line) { + Matcher jar = JAR_COMPLETE.matcher(line); + if (jar.find()) + return jar.group().trim(); + + Matcher pig = PIG_COMPLETE.matcher(line); + if (pig.find()) + return pig.group().trim(); + + return null; + } + + public static final Pattern JAR_ID = Pattern.compile(" Running job: (\\S+)$"); + public static final Pattern PIG_ID = Pattern.compile(" HadoopJobId: (\\S+)$"); + public static final Pattern[] ID_PATTERNS = {JAR_ID, PIG_ID}; + + /** + * Extract the job id from jar jobs. + */ + public static String extractChildJobId(String line) { + for (Pattern p : ID_PATTERNS) { + Matcher m = p.matcher(line); + if (m.find()) + return m.group(1); } - /** - * Is the object non-empty? - */ - public static boolean isset(char ch) { - return (ch != 0); - } + return null; + } - /** - * Is the object non-empty? - */ - public static boolean isset(T[] a) { - return (a != null) && (a.length > 0); - } + /** + * Take an array of strings and encode it into one string. + */ + public static String encodeArray(String[] plain) { + if (plain == null) + return null; + String[] escaped = new String[plain.length]; - /** - * Is the object non-empty? - */ - public static boolean isset(Collection col) { - return (col != null) && (!col.isEmpty()); + for (int i = 0; i < plain.length; ++i) { + if (plain[i] == null) { + plain[i] = ""; + } + escaped[i] = StringUtils.escapeString(plain[i]); } - /** - * Is the object non-empty? - */ - public static boolean isset(Map col) { - return (col != null) && (!col.isEmpty()); + return StringUtils.arrayToString(escaped); + } + + /** + * Encode a List into a string. + */ + public static String encodeArray(List list) { + if (list == null) + return null; + String[] array = new String[list.size()]; + return encodeArray(list.toArray(array)); + } + + /** + * Take an encode strings and decode it into an array of strings. + */ + public static String[] decodeArray(String s) { + if (s == null) + return null; + + String[] escaped = StringUtils.split(s); + String[] plain = new String[escaped.length]; + + for (int i = 0; i < escaped.length; ++i) + plain[i] = StringUtils.unEscapeString(escaped[i]); + + return plain; + } + + public static String[] hadoopFsListAsArray(String files, Configuration conf, + String user) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException { + if (files == null || conf == null) { + return null; } + String[] dirty = files.split(","); + String[] clean = new String[dirty.length]; + for (int i = 0; i < dirty.length; ++i) + clean[i] = hadoopFsFilename(dirty[i], conf, user); - public static final Pattern JAR_COMPLETE - = Pattern.compile(" map \\d+%\\s+reduce \\d+%$"); - public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$"); - - /** - * Extract the percent complete line from Pig or Jar jobs. - */ - public static String extractPercentComplete(String line) { - Matcher jar = JAR_COMPLETE.matcher(line); - if (jar.find()) - return jar.group().trim(); - - Matcher pig = PIG_COMPLETE.matcher(line); - if (pig.find()) - return pig.group().trim(); + return clean; + } - return null; + public static String hadoopFsListAsString(String files, Configuration conf, + String user) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException { + if (files == null || conf == null) { + return null; } - - public static final Pattern JAR_ID = Pattern.compile(" Running job: (\\S+)$"); - public static final Pattern PIG_ID = Pattern.compile(" HadoopJobId: (\\S+)$"); - public static final Pattern[] ID_PATTERNS = {JAR_ID, PIG_ID}; - - /** - * Extract the job id from jar jobs. - */ - public static String extractChildJobId(String line) { - for (Pattern p : ID_PATTERNS) { - Matcher m = p.matcher(line); - if (m.find()) - return m.group(1); - } - - return null; + return StringUtils.arrayToString(hadoopFsListAsArray(files, conf, user)); + } + + public static String hadoopFsFilename(String fname, Configuration conf, String user) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException { + Path p = hadoopFsPath(fname, conf, user); + if (p == null) + return null; + else + return p.toString(); + } + + /** + * @return true iff we are sure the file is not there. + */ + public static boolean hadoopFsIsMissing(FileSystem fs, Path p) { + try { + return !fs.exists(p); + } catch (Throwable t) { + // Got an error, might be there anyway due to a + // permissions problem. + return false; } - - /** - * Take an array of strings and encode it into one string. - */ - public static String encodeArray(String[] plain) { - if (plain == null) - return null; - - String[] escaped = new String[plain.length]; - - for (int i = 0; i < plain.length; ++i) { - if (plain[i] == null) { - plain[i] = ""; - } - escaped[i] = StringUtils.escapeString(plain[i]); - } - - return StringUtils.arrayToString(escaped); + } + + public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user, Configuration conf) throws IOException { + Path path = new Path(origPathStr); + String result = origPathStr; + + // shortcut for s3/asv + // If path contains scheme, user should mean an absolute path, + // However, path.isAbsolute tell us otherwise. + // So we skip conversion for non-hdfs. + if (!(path.getFileSystem(conf) instanceof DistributedFileSystem)&& + !(path.getFileSystem(conf) instanceof LocalFileSystem)) { + return result; } - - /** - * Encode a List into a string. - */ - public static String encodeArray(List list) { - if (list == null) - return null; - String[] array = new String[list.size()]; - return encodeArray(list.toArray(array)); + if (!path.isAbsolute()) { + result = "/user/" + user + "/" + origPathStr; } - - /** - * Take an encode strings and decode it into an array of strings. - */ - public static String[] decodeArray(String s) { - if (s == null) - return null; - - String[] escaped = StringUtils.split(s); - String[] plain = new String[escaped.length]; - - for (int i = 0; i < escaped.length; ++i) - plain[i] = StringUtils.unEscapeString(escaped[i]); - - return plain; + return result; + } + + public static Path hadoopFsPath(String fname, final Configuration conf, String user) + throws URISyntaxException, IOException, + InterruptedException { + if (fname == null || conf == null) { + return null; } - public static String[] hadoopFsListAsArray(String files, Configuration conf, - String user) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException { - if (files == null || conf == null) { - return null; - } - String[] dirty = files.split(","); - String[] clean = new String[dirty.length]; - - for (int i = 0; i < dirty.length; ++i) - clean[i] = hadoopFsFilename(dirty[i], conf, user); - - return clean; + UserGroupInformation ugi; + if (user!=null) { + ugi = UgiFactory.getUgi(user); + } else { + ugi = UserGroupInformation.getLoginUser(); } - - public static String hadoopFsListAsString(String files, Configuration conf, - String user) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException { - if (files == null || conf == null) { - return null; - } - return StringUtils.arrayToString(hadoopFsListAsArray(files, conf, user)); + final String finalFName = new String(fname); + + final FileSystem defaultFs = + ugi.doAs(new PrivilegedExceptionAction() { + public FileSystem run() + throws URISyntaxException, IOException, InterruptedException { + return FileSystem.get(new URI(finalFName), conf); + } + }); + + fname = addUserHomeDirectoryIfApplicable(fname, user, conf); + URI u = new URI(fname); + Path p = new Path(u).makeQualified(defaultFs); + + if (hadoopFsIsMissing(defaultFs, p)) + throw new FileNotFoundException("File " + fname + " does not exist."); + + return p; + } + + /** + * GET the given url. Returns the number of bytes received. + */ + public static int fetchUrl(URL url) + throws IOException { + URLConnection cnx = url.openConnection(); + InputStream in = cnx.getInputStream(); + + byte[] buf = new byte[8192]; + int total = 0; + int len = 0; + while ((len = in.read(buf)) >= 0) + total += len; + + return total; + } + + /** + * Set the environment variables to specify the hadoop user. + */ + public static Map hadoopUserEnv(String user, + String overrideClasspath) { + HashMap env = new HashMap(); + env.put("HADOOP_USER_NAME", user); + + if (overrideClasspath != null) { + env.put("HADOOP_USER_CLASSPATH_FIRST", "true"); + String cur = System.getenv("HADOOP_CLASSPATH"); + if (TempletonUtils.isset(cur)) + overrideClasspath = overrideClasspath + ":" + cur; + env.put("HADOOP_CLASSPATH", overrideClasspath); } - public static String hadoopFsFilename(String fname, Configuration conf, String user) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException { - Path p = hadoopFsPath(fname, conf, user); - if (p == null) - return null; - else - return p.toString(); - } - - /** - * @return true iff we are sure the file is not there. - */ - public static boolean hadoopFsIsMissing(FileSystem fs, Path p) { - try { - return !fs.exists(p); - } catch (Throwable t) { - // Got an error, might be there anyway due to a - // permissions problem. - return false; - } - } - - public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user, Configuration conf) throws IOException { - Path path = new Path(origPathStr); - String result = origPathStr; - - // shortcut for s3/asv - // If path contains scheme, user should mean an absolute path, - // However, path.isAbsolute tell us otherwise. - // So we skip conversion for non-hdfs. - if (!(path.getFileSystem(conf) instanceof DistributedFileSystem)&& - !(path.getFileSystem(conf) instanceof LocalFileSystem)) { - return result; - } - if (!path.isAbsolute()) { - result = "/user/" + user + "/" + origPathStr; - } - return result; - } - - public static Path hadoopFsPath(String fname, final Configuration conf, String user) - throws URISyntaxException, IOException, - InterruptedException { - if (fname == null || conf == null) { - return null; - } - - UserGroupInformation ugi; - if (user!=null) { - ugi = UgiFactory.getUgi(user); - } else { - ugi = UserGroupInformation.getLoginUser(); - } - final String finalFName = new String(fname); - - final FileSystem defaultFs = - ugi.doAs(new PrivilegedExceptionAction() { - public FileSystem run() - throws URISyntaxException, IOException, InterruptedException { - return FileSystem.get(new URI(finalFName), conf); - } - }); - - fname = addUserHomeDirectoryIfApplicable(fname, user, conf); - URI u = new URI(fname); - Path p = new Path(u).makeQualified(defaultFs); - - if (hadoopFsIsMissing(defaultFs, p)) - throw new FileNotFoundException("File " + fname + " does not exist."); - - return p; - } - - /** - * GET the given url. Returns the number of bytes received. - */ - public static int fetchUrl(URL url) - throws IOException { - URLConnection cnx = url.openConnection(); - InputStream in = cnx.getInputStream(); - - byte[] buf = new byte[8192]; - int total = 0; - int len = 0; - while ((len = in.read(buf)) >= 0) - total += len; - - return total; - } - - /** - * Set the environment variables to specify the hadoop user. - */ - public static Map hadoopUserEnv(String user, - String overrideClasspath) { - HashMap env = new HashMap(); - env.put("HADOOP_USER_NAME", user); - - if (overrideClasspath != null) { - env.put("HADOOP_USER_CLASSPATH_FIRST", "true"); - String cur = System.getenv("HADOOP_CLASSPATH"); - if (TempletonUtils.isset(cur)) - overrideClasspath = overrideClasspath + ":" + cur; - env.put("HADOOP_CLASSPATH", overrideClasspath); - } - - return env; - } + return env; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java index 45b2bdc..8a1c7b0 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java @@ -27,30 +27,30 @@ * execute a programs on the local box. */ public class TrivialExecService { - private static volatile TrivialExecService theSingleton; + private static volatile TrivialExecService theSingleton; - /** - * Retrieve the singleton. - */ - public static synchronized TrivialExecService getInstance() { - if (theSingleton == null) - theSingleton = new TrivialExecService(); - return theSingleton; - } + /** + * Retrieve the singleton. + */ + public static synchronized TrivialExecService getInstance() { + if (theSingleton == null) + theSingleton = new TrivialExecService(); + return theSingleton; + } - public Process run(List cmd, List removeEnv, - Map environmentVariables) - throws IOException { - System.err.println("templeton: starting " + cmd); - System.err.print("With environment variables: "); - for (Map.Entry keyVal : environmentVariables.entrySet()) { - System.err.println(keyVal.getKey() + "=" + keyVal.getValue()); - } - ProcessBuilder pb = new ProcessBuilder(cmd); - for (String key : removeEnv) - pb.environment().remove(key); - pb.environment().putAll(environmentVariables); - return pb.start(); + public Process run(List cmd, List removeEnv, + Map environmentVariables) + throws IOException { + System.err.println("templeton: starting " + cmd); + System.err.print("With environment variables: "); + for (Map.Entry keyVal : environmentVariables.entrySet()) { + System.err.println(keyVal.getKey() + "=" + keyVal.getValue()); } + ProcessBuilder pb = new ProcessBuilder(cmd); + for (String key : removeEnv) + pb.environment().remove(key); + pb.environment().putAll(environmentVariables); + return pb.start(); + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java index 1452390..4f5e2d9 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java @@ -34,166 +34,166 @@ * This does periodic cleanup */ public class ZooKeeperCleanup extends Thread { - protected Configuration appConf; + protected Configuration appConf; - // The interval to wake up and check the queue - public static final String ZK_CLEANUP_INTERVAL = - "templeton.zookeeper.cleanup.interval"; // 12 hours + // The interval to wake up and check the queue + public static final String ZK_CLEANUP_INTERVAL = + "templeton.zookeeper.cleanup.interval"; // 12 hours - // The max age of a task allowed - public static final String ZK_CLEANUP_MAX_AGE = - "templeton.zookeeper.cleanup.maxage"; // ~ 1 week + // The max age of a task allowed + public static final String ZK_CLEANUP_MAX_AGE = + "templeton.zookeeper.cleanup.maxage"; // ~ 1 week - protected static long interval = 1000L * 60L * 60L * 12L; - protected static long maxage = 1000L * 60L * 60L * 24L * 7L; + protected static long interval = 1000L * 60L * 60L * 12L; + protected static long maxage = 1000L * 60L * 60L * 24L * 7L; - // The logger - private static final Log LOG = LogFactory.getLog(ZooKeeperCleanup.class); + // The logger + private static final Log LOG = LogFactory.getLog(ZooKeeperCleanup.class); - // Handle to cancel loop - private boolean stop = false; + // Handle to cancel loop + private boolean stop = false; - // The instance - private static ZooKeeperCleanup thisclass = null; + // The instance + private static ZooKeeperCleanup thisclass = null; - // Whether the cycle is running - private static boolean isRunning = false; + // Whether the cycle is running + private static boolean isRunning = false; - /** - * Create a cleanup object. We use the appConfig to configure JobState. - * @param appConf - */ - private ZooKeeperCleanup(Configuration appConf) { - this.appConf = appConf; - interval = appConf.getLong(ZK_CLEANUP_INTERVAL, interval); - maxage = appConf.getLong(ZK_CLEANUP_MAX_AGE, maxage); - } + /** + * Create a cleanup object. We use the appConfig to configure JobState. + * @param appConf + */ + private ZooKeeperCleanup(Configuration appConf) { + this.appConf = appConf; + interval = appConf.getLong(ZK_CLEANUP_INTERVAL, interval); + maxage = appConf.getLong(ZK_CLEANUP_MAX_AGE, maxage); + } - public static ZooKeeperCleanup getInstance(Configuration appConf) { - if (thisclass != null) { - return thisclass; - } - thisclass = new ZooKeeperCleanup(appConf); - return thisclass; + public static ZooKeeperCleanup getInstance(Configuration appConf) { + if (thisclass != null) { + return thisclass; } + thisclass = new ZooKeeperCleanup(appConf); + return thisclass; + } - public static void startInstance(Configuration appConf) throws IOException { - if (!isRunning) { - getInstance(appConf).start(); - } + public static void startInstance(Configuration appConf) throws IOException { + if (!isRunning) { + getInstance(appConf).start(); } + } + + /** + * Run the cleanup loop. + * + * @throws IOException + */ + public void run() { + ZooKeeper zk = null; + List nodes = null; + isRunning = true; + while (!stop) { + try { + // Put each check in a separate try/catch, so if that particular + // cycle fails, it'll try again on the next cycle. + try { + zk = ZooKeeperStorage.zkOpen(appConf); + + nodes = getChildList(zk); + + for (String node : nodes) { + boolean deleted = checkAndDelete(node, zk); + if (!deleted) { + break; + } + } - /** - * Run the cleanup loop. - * - * @throws IOException - */ - public void run() { - ZooKeeper zk = null; - List nodes = null; - isRunning = true; - while (!stop) { + zk.close(); + } catch (Exception e) { + LOG.error("Cleanup cycle failed: " + e.getMessage()); + } finally { + if (zk != null) { try { - // Put each check in a separate try/catch, so if that particular - // cycle fails, it'll try again on the next cycle. - try { - zk = ZooKeeperStorage.zkOpen(appConf); - - nodes = getChildList(zk); - - for (String node : nodes) { - boolean deleted = checkAndDelete(node, zk); - if (!deleted) { - break; - } - } - - zk.close(); - } catch (Exception e) { - LOG.error("Cleanup cycle failed: " + e.getMessage()); - } finally { - if (zk != null) { - try { - zk.close(); - } catch (InterruptedException e) { - // We're trying to exit anyway, just ignore. - } - } - } - - long sleepMillis = (long) (Math.random() * interval); - LOG.info("Next execution: " + new Date(new Date().getTime() - + sleepMillis)); - Thread.sleep(sleepMillis); - - } catch (Exception e) { - // If sleep fails, we should exit now before things get worse. - isRunning = false; - LOG.error("Cleanup failed: " + e.getMessage(), e); + zk.close(); + } catch (InterruptedException e) { + // We're trying to exit anyway, just ignore. } + } } + + long sleepMillis = (long) (Math.random() * interval); + LOG.info("Next execution: " + new Date(new Date().getTime() + + sleepMillis)); + Thread.sleep(sleepMillis); + + } catch (Exception e) { + // If sleep fails, we should exit now before things get worse. isRunning = false; + LOG.error("Cleanup failed: " + e.getMessage(), e); + } } - - /** - * Get the list of jobs from JobState - * - * @throws IOException - */ - public List getChildList(ZooKeeper zk) { - try { - List jobs = JobStateTracker.getTrackingJobs(appConf, zk); - Collections.sort(jobs); - return jobs; - } catch (IOException e) { - LOG.info("No jobs to check."); - } - return new ArrayList(); + isRunning = false; + } + + /** + * Get the list of jobs from JobState + * + * @throws IOException + */ + public List getChildList(ZooKeeper zk) { + try { + List jobs = JobStateTracker.getTrackingJobs(appConf, zk); + Collections.sort(jobs); + return jobs; + } catch (IOException e) { + LOG.info("No jobs to check."); } - - /** - * Check to see if a job is more than maxage old, and delete it if so. - */ - public boolean checkAndDelete(String node, ZooKeeper zk) { - JobState state = null; + return new ArrayList(); + } + + /** + * Check to see if a job is more than maxage old, and delete it if so. + */ + public boolean checkAndDelete(String node, ZooKeeper zk) { + JobState state = null; + try { + JobStateTracker tracker = new JobStateTracker(node, zk, true, + appConf.get(TempletonStorage.STORAGE_ROOT + + ZooKeeperStorage.TRACKINGDIR)); + long now = new Date().getTime(); + state = new JobState(tracker.getJobID(), appConf); + + // Set the default to 0 -- if the created date is null, there was + // an error in creation, and we want to delete it anyway. + long then = 0; + if (state.getCreated() != null) { + then = state.getCreated(); + } + if (now - then > maxage) { + LOG.info("Deleting " + tracker.getJobID()); + state.delete(); + tracker.delete(); + return true; + } + return false; + } catch (Exception e) { + LOG.info("checkAndDelete failed for " + node); + // We don't throw a new exception for this -- just keep going with the + // next one. + return true; + } finally { + if (state != null) { try { - JobStateTracker tracker = new JobStateTracker(node, zk, true, - appConf.get(TempletonStorage.STORAGE_ROOT + - ZooKeeperStorage.TRACKINGDIR)); - long now = new Date().getTime(); - state = new JobState(tracker.getJobID(), appConf); - - // Set the default to 0 -- if the created date is null, there was - // an error in creation, and we want to delete it anyway. - long then = 0; - if (state.getCreated() != null) { - then = state.getCreated(); - } - if (now - then > maxage) { - LOG.info("Deleting " + tracker.getJobID()); - state.delete(); - tracker.delete(); - return true; - } - return false; - } catch (Exception e) { - LOG.info("checkAndDelete failed for " + node); - // We don't throw a new exception for this -- just keep going with the - // next one. - return true; - } finally { - if (state != null) { - try { - state.close(); - } catch (IOException e) { - LOG.info("Couldn't close job state."); - } - } + state.close(); + } catch (IOException e) { + LOG.info("Couldn't close job state."); } + } } + } - // Handle to stop this process from the outside if needed. - public void exit() { - stop = true; - } + // Handle to stop this process from the outside if needed. + public void exit() { + stop = true; + } } diff --git a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java index 98c65ea..ecde598 100644 --- a/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java +++ b/hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java @@ -44,329 +44,329 @@ */ public class ZooKeeperStorage implements TempletonStorage { - public static final String TRACKINGDIR = "/created"; + public static final String TRACKINGDIR = "/created"; - // Locations for each of the storage types - public String storage_root = null; - public String job_path = null; - public String job_trackingpath = null; - public String overhead_path = null; + // Locations for each of the storage types + public String storage_root = null; + public String job_path = null; + public String job_trackingpath = null; + public String overhead_path = null; - public static final String ZK_HOSTS = "templeton.zookeeper.hosts"; - public static final String ZK_SESSION_TIMEOUT - = "templeton.zookeeper.session-timeout"; + public static final String ZK_HOSTS = "templeton.zookeeper.hosts"; + public static final String ZK_SESSION_TIMEOUT + = "templeton.zookeeper.session-timeout"; - public static final String ENCODING = "UTF-8"; + public static final String ENCODING = "UTF-8"; - private static final Log LOG = LogFactory.getLog(ZooKeeperStorage.class); + private static final Log LOG = LogFactory.getLog(ZooKeeperStorage.class); - private ZooKeeper zk; + private ZooKeeper zk; - /** - * Open a ZooKeeper connection for the JobState. - */ - public static ZooKeeper zkOpen(String zkHosts, int zkSessionTimeout) - throws IOException { - return new ZooKeeper(zkHosts, - zkSessionTimeout, - new Watcher() { - @Override - synchronized public void process(WatchedEvent event) { - } - }); - } + /** + * Open a ZooKeeper connection for the JobState. + */ + public static ZooKeeper zkOpen(String zkHosts, int zkSessionTimeout) + throws IOException { + return new ZooKeeper(zkHosts, + zkSessionTimeout, + new Watcher() { + @Override + synchronized public void process(WatchedEvent event) { + } + }); + } - /** - * Open a ZooKeeper connection for the JobState. - */ - public static ZooKeeper zkOpen(Configuration conf) - throws IOException { - return zkOpen(conf.get(ZK_HOSTS), - conf.getInt(ZK_SESSION_TIMEOUT, 30000)); - } + /** + * Open a ZooKeeper connection for the JobState. + */ + public static ZooKeeper zkOpen(Configuration conf) + throws IOException { + return zkOpen(conf.get(ZK_HOSTS), + conf.getInt(ZK_SESSION_TIMEOUT, 30000)); + } - public ZooKeeperStorage() { - // No-op -- this is needed to be able to instantiate the - // class from the name. + public ZooKeeperStorage() { + // No-op -- this is needed to be able to instantiate the + // class from the name. + } + + /** + * Close this ZK connection. + */ + public void close() + throws IOException { + if (zk != null) { + try { + zk.close(); + zk = null; + } catch (InterruptedException e) { + throw new IOException("Closing ZooKeeper connection", e); + } } + } - /** - * Close this ZK connection. - */ - public void close() - throws IOException { - if (zk != null) { - try { - zk.close(); - zk = null; - } catch (InterruptedException e) { - throw new IOException("Closing ZooKeeper connection", e); - } - } + public void startCleanup(Configuration config) { + try { + ZooKeeperCleanup.startInstance(config); + } catch (Exception e) { + LOG.warn("Cleanup instance didn't start."); } + } - public void startCleanup(Configuration config) { + /** + * Create a node in ZooKeeper + */ + public void create(Type type, String id) + throws IOException { + try { + String[] paths = getPaths(makeZnode(type, id)); + boolean wasCreated = false; + for (String znode : paths) { try { - ZooKeeperCleanup.startInstance(config); + zk.create(znode, new byte[0], + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + wasCreated = true; + } catch (KeeperException.NodeExistsException e) { + } + } + if (wasCreated) { + try { + // Really not sure if this should go here. Will have + // to see how the storage mechanism evolves. + if (type.equals(Type.JOB)) { + JobStateTracker jt = new JobStateTracker(id, zk, false, + job_trackingpath); + jt.create(); + } } catch (Exception e) { - LOG.warn("Cleanup instance didn't start."); + LOG.warn("Error tracking: " + e.getMessage()); + // If we couldn't create the tracker node, don't + // create the main node. + zk.delete(makeZnode(type, id), -1); } - } - - /** - * Create a node in ZooKeeper - */ - public void create(Type type, String id) - throws IOException { + } + if (zk.exists(makeZnode(type, id), false) == null) + throw new IOException("Unable to create " + makeZnode(type, id)); + if (wasCreated) { try { - String[] paths = getPaths(makeZnode(type, id)); - boolean wasCreated = false; - for (String znode : paths) { - try { - zk.create(znode, new byte[0], - Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - wasCreated = true; - } catch (KeeperException.NodeExistsException e) { - } - } - if (wasCreated) { - try { - // Really not sure if this should go here. Will have - // to see how the storage mechanism evolves. - if (type.equals(Type.JOB)) { - JobStateTracker jt = new JobStateTracker(id, zk, false, - job_trackingpath); - jt.create(); - } - } catch (Exception e) { - LOG.warn("Error tracking: " + e.getMessage()); - // If we couldn't create the tracker node, don't - // create the main node. - zk.delete(makeZnode(type, id), -1); - } - } - if (zk.exists(makeZnode(type, id), false) == null) - throw new IOException("Unable to create " + makeZnode(type, id)); - if (wasCreated) { - try { - saveField(type, id, "created", - Long.toString(System.currentTimeMillis())); - } catch (NotFoundException nfe) { - // Wow, something's really wrong. - throw new IOException("Couldn't write to node " + id, nfe); - } - } - } catch (KeeperException e) { - throw new IOException("Creating " + id, e); - } catch (InterruptedException e) { - throw new IOException("Creating " + id, e); + saveField(type, id, "created", + Long.toString(System.currentTimeMillis())); + } catch (NotFoundException nfe) { + // Wow, something's really wrong. + throw new IOException("Couldn't write to node " + id, nfe); } + } + } catch (KeeperException e) { + throw new IOException("Creating " + id, e); + } catch (InterruptedException e) { + throw new IOException("Creating " + id, e); } + } - /** - * Get the path based on the job type. - * - * @param type - */ - public String getPath(Type type) { - String typepath = overhead_path; - switch (type) { - case JOB: - typepath = job_path; - break; - case JOBTRACKING: - typepath = job_trackingpath; - break; - } - return typepath; + /** + * Get the path based on the job type. + * + * @param type + */ + public String getPath(Type type) { + String typepath = overhead_path; + switch (type) { + case JOB: + typepath = job_path; + break; + case JOBTRACKING: + typepath = job_trackingpath; + break; } + return typepath; + } - public static String[] getPaths(String fullpath) { - ArrayList paths = new ArrayList(); - if (fullpath.length() < 2) { - paths.add(fullpath); - } else { - int location = 0; - while ((location = fullpath.indexOf("/", location + 1)) > 0) { - paths.add(fullpath.substring(0, location)); - } - paths.add(fullpath); - } - String[] strings = new String[paths.size()]; - return paths.toArray(strings); + public static String[] getPaths(String fullpath) { + ArrayList paths = new ArrayList(); + if (fullpath.length() < 2) { + paths.add(fullpath); + } else { + int location = 0; + while ((location = fullpath.indexOf("/", location + 1)) > 0) { + paths.add(fullpath.substring(0, location)); + } + paths.add(fullpath); } + String[] strings = new String[paths.size()]; + return paths.toArray(strings); + } - /** - * A helper method that sets a field value. - * @param type - * @param id - * @param name - * @param val - * @throws KeeperException - * @throws UnsupportedEncodingException - * @throws InterruptedException - */ - private void setFieldData(Type type, String id, String name, String val) - throws KeeperException, UnsupportedEncodingException, InterruptedException { - try { - zk.create(makeFieldZnode(type, id, name), - val.getBytes(ENCODING), - Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - } catch (KeeperException.NodeExistsException e) { - zk.setData(makeFieldZnode(type, id, name), - val.getBytes(ENCODING), - -1); - } + /** + * A helper method that sets a field value. + * @param type + * @param id + * @param name + * @param val + * @throws KeeperException + * @throws UnsupportedEncodingException + * @throws InterruptedException + */ + private void setFieldData(Type type, String id, String name, String val) + throws KeeperException, UnsupportedEncodingException, InterruptedException { + try { + zk.create(makeFieldZnode(type, id, name), + val.getBytes(ENCODING), + Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + } catch (KeeperException.NodeExistsException e) { + zk.setData(makeFieldZnode(type, id, name), + val.getBytes(ENCODING), + -1); } + } - /** - * Make a ZK path to the named field. - */ - public String makeFieldZnode(Type type, String id, String name) { - return makeZnode(type, id) + "/" + name; - } + /** + * Make a ZK path to the named field. + */ + public String makeFieldZnode(Type type, String id, String name) { + return makeZnode(type, id) + "/" + name; + } - /** - * Make a ZK path to job - */ - public String makeZnode(Type type, String id) { - return getPath(type) + "/" + id; - } + /** + * Make a ZK path to job + */ + public String makeZnode(Type type, String id) { + return getPath(type) + "/" + id; + } - @Override - public void saveField(Type type, String id, String key, String val) - throws NotFoundException { - try { - if (val != null) { - create(type, id); - setFieldData(type, id, key, val); - } - } catch (Exception e) { - throw new NotFoundException("Writing " + key + ": " + val + ", " - + e.getMessage()); - } + @Override + public void saveField(Type type, String id, String key, String val) + throws NotFoundException { + try { + if (val != null) { + create(type, id); + setFieldData(type, id, key, val); + } + } catch (Exception e) { + throw new NotFoundException("Writing " + key + ": " + val + ", " + + e.getMessage()); } + } - @Override - public String getField(Type type, String id, String key) { - try { - byte[] b = zk.getData(makeFieldZnode(type, id, key), false, null); - return new String(b, ENCODING); - } catch (Exception e) { - return null; - } + @Override + public String getField(Type type, String id, String key) { + try { + byte[] b = zk.getData(makeFieldZnode(type, id, key), false, null); + return new String(b, ENCODING); + } catch (Exception e) { + return null; } + } - @Override - public Map getFields(Type type, String id) { - HashMap map = new HashMap(); - try { - for (String node : zk.getChildren(makeZnode(type, id), false)) { - byte[] b = zk.getData(makeFieldZnode(type, id, node), - false, null); - map.put(node, new String(b, ENCODING)); - } - } catch (Exception e) { - return map; - } - return map; + @Override + public Map getFields(Type type, String id) { + HashMap map = new HashMap(); + try { + for (String node : zk.getChildren(makeZnode(type, id), false)) { + byte[] b = zk.getData(makeFieldZnode(type, id, node), + false, null); + map.put(node, new String(b, ENCODING)); + } + } catch (Exception e) { + return map; } + return map; + } - @Override - public boolean delete(Type type, String id) throws NotFoundException { + @Override + public boolean delete(Type type, String id) throws NotFoundException { + try { + for (String child : zk.getChildren(makeZnode(type, id), false)) { try { - for (String child : zk.getChildren(makeZnode(type, id), false)) { - try { - zk.delete(makeFieldZnode(type, id, child), -1); - } catch (Exception e) { - // Other nodes may be trying to delete this at the same time, - // so just log errors and skip them. - throw new NotFoundException("Couldn't delete " + - makeFieldZnode(type, id, child)); - } - } - try { - zk.delete(makeZnode(type, id), -1); - } catch (Exception e) { - // Same thing -- might be deleted by other nodes, so just go on. - throw new NotFoundException("Couldn't delete " + - makeZnode(type, id)); - } + zk.delete(makeFieldZnode(type, id, child), -1); } catch (Exception e) { - // Error getting children of node -- probably node has been deleted - throw new NotFoundException("Couldn't get children of " + - makeZnode(type, id)); + // Other nodes may be trying to delete this at the same time, + // so just log errors and skip them. + throw new NotFoundException("Couldn't delete " + + makeFieldZnode(type, id, child)); } - return true; + } + try { + zk.delete(makeZnode(type, id), -1); + } catch (Exception e) { + // Same thing -- might be deleted by other nodes, so just go on. + throw new NotFoundException("Couldn't delete " + + makeZnode(type, id)); + } + } catch (Exception e) { + // Error getting children of node -- probably node has been deleted + throw new NotFoundException("Couldn't get children of " + + makeZnode(type, id)); } + return true; + } - @Override - public List getAll() { - ArrayList allNodes = new ArrayList(); - for (Type type : Type.values()) { - allNodes.addAll(getAllForType(type)); - } - return allNodes; + @Override + public List getAll() { + ArrayList allNodes = new ArrayList(); + for (Type type : Type.values()) { + allNodes.addAll(getAllForType(type)); } + return allNodes; + } - @Override - public List getAllForType(Type type) { - try { - return zk.getChildren(getPath(type), false); - } catch (Exception e) { - return new ArrayList(); - } + @Override + public List getAllForType(Type type) { + try { + return zk.getChildren(getPath(type), false); + } catch (Exception e) { + return new ArrayList(); } + } - @Override - public List getAllForKey(String key, String value) { - ArrayList allNodes = new ArrayList(); - try { - for (Type type : Type.values()) { - allNodes.addAll(getAllForTypeAndKey(type, key, value)); - } - } catch (Exception e) { - LOG.info("Couldn't find children."); - } - return allNodes; + @Override + public List getAllForKey(String key, String value) { + ArrayList allNodes = new ArrayList(); + try { + for (Type type : Type.values()) { + allNodes.addAll(getAllForTypeAndKey(type, key, value)); + } + } catch (Exception e) { + LOG.info("Couldn't find children."); } + return allNodes; + } - @Override - public List getAllForTypeAndKey(Type type, String key, String value) { - ArrayList allNodes = new ArrayList(); - try { - for (String id : zk.getChildren(getPath(type), false)) { - for (String field : zk.getChildren(id, false)) { - if (field.endsWith("/" + key)) { - byte[] b = zk.getData(field, false, null); - if (new String(b, ENCODING).equals(value)) { - allNodes.add(id); - } - } - } + @Override + public List getAllForTypeAndKey(Type type, String key, String value) { + ArrayList allNodes = new ArrayList(); + try { + for (String id : zk.getChildren(getPath(type), false)) { + for (String field : zk.getChildren(id, false)) { + if (field.endsWith("/" + key)) { + byte[] b = zk.getData(field, false, null); + if (new String(b, ENCODING).equals(value)) { + allNodes.add(id); } - } catch (Exception e) { - // Log and go to the next type -- this one might not exist - LOG.info("Couldn't find children of " + getPath(type)); + } } - return allNodes; + } + } catch (Exception e) { + // Log and go to the next type -- this one might not exist + LOG.info("Couldn't find children of " + getPath(type)); } + return allNodes; + } - @Override - public void openStorage(Configuration config) throws IOException { - storage_root = config.get(STORAGE_ROOT); - job_path = storage_root + "/jobs"; - job_trackingpath = storage_root + TRACKINGDIR; - overhead_path = storage_root + "/overhead"; + @Override + public void openStorage(Configuration config) throws IOException { + storage_root = config.get(STORAGE_ROOT); + job_path = storage_root + "/jobs"; + job_trackingpath = storage_root + TRACKINGDIR; + overhead_path = storage_root + "/overhead"; - if (zk == null) { - zk = zkOpen(config); - } + if (zk == null) { + zk = zkOpen(config); } + } - @Override - public void closeStorage() throws IOException { - close(); - } + @Override + public void closeStorage() throws IOException { + close(); + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestDesc.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestDesc.java index 5e0463d..653cd04 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestDesc.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestDesc.java @@ -31,124 +31,124 @@ * and from json. This also sets every field of the TableDesc object. */ public class TestDesc extends TestCase { - public void testTableDesc() - throws Exception - { - TableDesc td = buildTableDesc(); - assertNotNull(td); - - String json = toJson(td); - assertTrue(json.length() > 100); - - TableDesc tdCopy = (TableDesc) fromJson(json, TableDesc.class); - assertEquals(td, tdCopy); - } - - private TableDesc buildTableDesc() { - TableDesc x = new TableDesc(); - x.group = "staff"; - x.permissions = "755"; - x.external = true; - x.ifNotExists = true; - x.table = "a_table"; - x.comment = "a comment"; - x.columns = buildColumns(); - x.partitionedBy = buildPartitionedBy(); - x.clusteredBy = buildClusterBy(); - x.format = buildStorageFormat(); - x.location = "hdfs://localhost:9000/user/me/a_table"; - x.tableProperties = buildGenericProperties(); - return x; - } - - public List buildColumns() { - ArrayList x = new ArrayList(); - x.add(new ColumnDesc("id", "bigint", null)); - x.add(new ColumnDesc("price", "float", "The unit price")); - x.add(new ColumnDesc("name", "string", "The item name")); - return x; - } - - public List buildPartitionedBy() { - ArrayList x = new ArrayList(); - x.add(new ColumnDesc("country", "string", "The country of origin")); - return x; - } - - public TableDesc.ClusteredByDesc buildClusterBy() { - TableDesc.ClusteredByDesc x = new TableDesc.ClusteredByDesc(); - x.columnNames = new ArrayList(); - x.columnNames.add("id"); - x.sortedBy = buildSortedBy(); - x.numberOfBuckets = 16; - return x; - } - - public List buildSortedBy() { - ArrayList x - = new ArrayList(); - x.add(new TableDesc.ClusterSortOrderDesc("id", TableDesc.SortDirectionDesc.ASC)); - return x; - } - - public TableDesc.StorageFormatDesc buildStorageFormat() { - TableDesc.StorageFormatDesc x = new TableDesc.StorageFormatDesc(); - x.rowFormat = buildRowFormat(); - x.storedAs = "rcfile"; - x.storedBy = buildStoredBy(); - return x; - } - - public TableDesc.RowFormatDesc buildRowFormat() { - TableDesc.RowFormatDesc x = new TableDesc.RowFormatDesc(); - x.fieldsTerminatedBy = "\u0001"; - x.collectionItemsTerminatedBy = "\u0002"; - x.mapKeysTerminatedBy = "\u0003"; - x.linesTerminatedBy = "\u0004"; - x.serde = buildSerde(); - return x; - } - - public TableDesc.SerdeDesc buildSerde() { - TableDesc.SerdeDesc x = new TableDesc.SerdeDesc(); - x.name = "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"; - x.properties = new HashMap(); - x.properties.put("field.delim", ","); - return x; - } - - public TableDesc.StoredByDesc buildStoredBy() { - TableDesc.StoredByDesc x = new TableDesc.StoredByDesc(); - x.className = "org.apache.hadoop.hive.hbase.HBaseStorageHandler"; - x.properties = new HashMap(); - x.properties.put("hbase.columns.mapping", "cf:string"); - x.properties.put("hbase.table.name", "hbase_table_0"); - return x; - } - - public Map buildGenericProperties() { - HashMap x = new HashMap(); - x.put("carmas", "evil"); - x.put("rachel", "better"); - x.put("ctdean", "angelic"); - x.put("paul", "dangerously unbalanced"); - x.put("dra", "organic"); - return x; - } - - private String toJson(Object obj) - throws Exception - { - ObjectMapper mapper = new ObjectMapper(); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - mapper.writeValue(out, obj); - return out.toString(); - } - - private Object fromJson(String json, Class klass) - throws Exception - { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(json, klass); - } + public void testTableDesc() + throws Exception + { + TableDesc td = buildTableDesc(); + assertNotNull(td); + + String json = toJson(td); + assertTrue(json.length() > 100); + + TableDesc tdCopy = (TableDesc) fromJson(json, TableDesc.class); + assertEquals(td, tdCopy); + } + + private TableDesc buildTableDesc() { + TableDesc x = new TableDesc(); + x.group = "staff"; + x.permissions = "755"; + x.external = true; + x.ifNotExists = true; + x.table = "a_table"; + x.comment = "a comment"; + x.columns = buildColumns(); + x.partitionedBy = buildPartitionedBy(); + x.clusteredBy = buildClusterBy(); + x.format = buildStorageFormat(); + x.location = "hdfs://localhost:9000/user/me/a_table"; + x.tableProperties = buildGenericProperties(); + return x; + } + + public List buildColumns() { + ArrayList x = new ArrayList(); + x.add(new ColumnDesc("id", "bigint", null)); + x.add(new ColumnDesc("price", "float", "The unit price")); + x.add(new ColumnDesc("name", "string", "The item name")); + return x; + } + + public List buildPartitionedBy() { + ArrayList x = new ArrayList(); + x.add(new ColumnDesc("country", "string", "The country of origin")); + return x; + } + + public TableDesc.ClusteredByDesc buildClusterBy() { + TableDesc.ClusteredByDesc x = new TableDesc.ClusteredByDesc(); + x.columnNames = new ArrayList(); + x.columnNames.add("id"); + x.sortedBy = buildSortedBy(); + x.numberOfBuckets = 16; + return x; + } + + public List buildSortedBy() { + ArrayList x + = new ArrayList(); + x.add(new TableDesc.ClusterSortOrderDesc("id", TableDesc.SortDirectionDesc.ASC)); + return x; + } + + public TableDesc.StorageFormatDesc buildStorageFormat() { + TableDesc.StorageFormatDesc x = new TableDesc.StorageFormatDesc(); + x.rowFormat = buildRowFormat(); + x.storedAs = "rcfile"; + x.storedBy = buildStoredBy(); + return x; + } + + public TableDesc.RowFormatDesc buildRowFormat() { + TableDesc.RowFormatDesc x = new TableDesc.RowFormatDesc(); + x.fieldsTerminatedBy = "\u0001"; + x.collectionItemsTerminatedBy = "\u0002"; + x.mapKeysTerminatedBy = "\u0003"; + x.linesTerminatedBy = "\u0004"; + x.serde = buildSerde(); + return x; + } + + public TableDesc.SerdeDesc buildSerde() { + TableDesc.SerdeDesc x = new TableDesc.SerdeDesc(); + x.name = "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"; + x.properties = new HashMap(); + x.properties.put("field.delim", ","); + return x; + } + + public TableDesc.StoredByDesc buildStoredBy() { + TableDesc.StoredByDesc x = new TableDesc.StoredByDesc(); + x.className = "org.apache.hadoop.hive.hbase.HBaseStorageHandler"; + x.properties = new HashMap(); + x.properties.put("hbase.columns.mapping", "cf:string"); + x.properties.put("hbase.table.name", "hbase_table_0"); + return x; + } + + public Map buildGenericProperties() { + HashMap x = new HashMap(); + x.put("carmas", "evil"); + x.put("rachel", "better"); + x.put("ctdean", "angelic"); + x.put("paul", "dangerously unbalanced"); + x.put("dra", "organic"); + return x; + } + + private String toJson(Object obj) + throws Exception + { + ObjectMapper mapper = new ObjectMapper(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mapper.writeValue(out, obj); + return out.toString(); + } + + private Object fromJson(String json, Class klass) + throws Exception + { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(json, klass); + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestServer.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestServer.java index 7cf5c35..2f89ad4 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestServer.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestServer.java @@ -28,27 +28,27 @@ */ public class TestServer extends TestCase { - MockServer server; - - public void setUp() { - new Main(null); // Initialize the config - server = new MockServer(); - } - - public void testServer() { - assertNotNull(server); - } - - public void testStatus() { - assertEquals(server.status().get("status"), "ok"); - } - - public void testVersions() { - assertEquals(server.version().get("version"), "v1"); - } - - public void testFormats() { - assertEquals(1, server.requestFormats().size()); - assertEquals( ((List)server.requestFormats().get("responseTypes")).get(0), "application/json"); - } + MockServer server; + + public void setUp() { + new Main(null); // Initialize the config + server = new MockServer(); + } + + public void testServer() { + assertNotNull(server); + } + + public void testStatus() { + assertEquals(server.status().get("status"), "ok"); + } + + public void testVersions() { + assertEquals(server.version().get("version"), "v1"); + } + + public void testFormats() { + assertEquals(1, server.requestFormats().size()); + assertEquals( ((List)server.requestFormats().get("responseTypes")).get(0), "application/json"); + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestWebHCatE2e.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestWebHCatE2e.java index 4897ae2..9df4421 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestWebHCatE2e.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestWebHCatE2e.java @@ -55,226 +55,226 @@ * It may be possible to extend this to more than just DDL later. */ public class TestWebHCatE2e { - private static final Logger LOG = - LoggerFactory.getLogger(TestWebHCatE2e.class); - private static final String templetonBaseUrl = - "http://localhost:50111/templeton/v1"; - private static final String username= "johndoe"; - private static final String ERROR_CODE = "errorCode"; - private static Main templetonServer; - private static final String charSet = "UTF-8"; - @BeforeClass - public static void startHebHcatInMem() { - templetonServer = new Main(new String[] {"-D" + AppConfig.UNIT_TEST_MODE + "=true"}); - LOG.info("Starting Main"); - templetonServer.run(); - LOG.info("Main started"); + private static final Logger LOG = + LoggerFactory.getLogger(TestWebHCatE2e.class); + private static final String templetonBaseUrl = + "http://localhost:50111/templeton/v1"; + private static final String username= "johndoe"; + private static final String ERROR_CODE = "errorCode"; + private static Main templetonServer; + private static final String charSet = "UTF-8"; + @BeforeClass + public static void startHebHcatInMem() { + templetonServer = new Main(new String[] {"-D" + AppConfig.UNIT_TEST_MODE + "=true"}); + LOG.info("Starting Main"); + templetonServer.run(); + LOG.info("Main started"); + } + @AfterClass + public static void stopWebHcatInMem() { + if(templetonServer != null) { + LOG.info("Stopping Main"); + templetonServer.stop(); + LOG.info("Main stopped"); } - @AfterClass - public static void stopWebHcatInMem() { - if(templetonServer != null) { - LOG.info("Stopping Main"); - templetonServer.stop(); - LOG.info("Main stopped"); - } - } - @Test - public void getStatus() throws IOException { - LOG.debug("+getStatus()"); - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/status", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), "{\"status\":\"ok\",\"version\":\"v1\"}", p.responseBody); - LOG.debug("-getStatus()"); - } - @Ignore("not ready due to HIVE-4824") - @Test - public void listDataBases() throws IOException { - LOG.debug("+listDataBases()"); - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), "{\"databases\":[\"default\"]}", p.responseBody); - LOG.debug("-listDataBases()"); - } - /** - * Check that we return correct status code when the URL doesn't map to any method - * in {@link Server} - */ - @Test - public void invalidPath() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/no_such_mapping/database", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); - } - /** - * tries to drop table in a DB that doesn't exist - */ - @Ignore("not ready due to HIVE-4824") - @Test - public void dropTableNoSuchDB() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + - "/ddl/database/no_such_db/table/t1", HTTP_METHOD_TYPE.DELETE); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), - ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), - getErrorCode(p.responseBody)); - } - /** - * tries to drop table in a DB that doesn't exist - */ - @Ignore("not ready due to HIVE-4824") - @Test - public void dropTableNoSuchDbIfExists() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/no_such_db/table/t1", - HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] - {new NameValuePair("ifExists", "true")}); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), getErrorCode(p.responseBody)); + } + @Test + public void getStatus() throws IOException { + LOG.debug("+getStatus()"); + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/status", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), "{\"status\":\"ok\",\"version\":\"v1\"}", p.responseBody); + LOG.debug("-getStatus()"); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void listDataBases() throws IOException { + LOG.debug("+listDataBases()"); + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), "{\"databases\":[\"default\"]}", p.responseBody); + LOG.debug("-listDataBases()"); + } + /** + * Check that we return correct status code when the URL doesn't map to any method + * in {@link Server} + */ + @Test + public void invalidPath() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/no_such_mapping/database", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); + } + /** + * tries to drop table in a DB that doesn't exist + */ + @Ignore("not ready due to HIVE-4824") + @Test + public void dropTableNoSuchDB() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + + "/ddl/database/no_such_db/table/t1", HTTP_METHOD_TYPE.DELETE); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), + ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), + getErrorCode(p.responseBody)); + } + /** + * tries to drop table in a DB that doesn't exist + */ + @Ignore("not ready due to HIVE-4824") + @Test + public void dropTableNoSuchDbIfExists() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/no_such_db/table/t1", + HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] + {new NameValuePair("ifExists", "true")}); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), getErrorCode(p.responseBody)); + } + /** + * tries to drop table that doesn't exist (with ifExists=true) + */ + @Ignore("not ready due to HIVE-4824") + @Test + public void dropTableIfExists() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/no_such_table", + HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] + {new NameValuePair("ifExists", "true")}); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void createDataBase() throws IOException { + Map props = new HashMap(); + props.put("comment", "Hello, there"); + props.put("location", "file://" + System.getProperty("hive.metastore.warehouse.dir")); + Map props2 = new HashMap(); + props2.put("prop", "val"); + props.put("properties", props2); + //{ "comment":"Hello there", "location":"file:///tmp/warehouse", "properties":{"a":"b"}} + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/newdb", HTTP_METHOD_TYPE.PUT, props, null); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void createTable() throws IOException { + //{ "comment":"test", "columns": [ { "name": "col1", "type": "string" } ], "format": { "storedAs": "rcfile" } } + Map props = new HashMap(); + props.put("comment", "Table in default db"); + Map col = new HashMap(); + col.put("name", "col1"); + col.put("type", "string"); + List> colList = new ArrayList>(1); + colList.add(col); + props.put("columns", colList); + Map format = new HashMap(); + format.put("storedAs", "rcfile"); + props.put("format", format); + MethodCallRetVal createTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.PUT, props, null); + Assert.assertEquals(createTbl.getAssertMsg(), HttpStatus.OK_200, createTbl.httpStatusCode); + LOG.info("createTable() resp: " + createTbl.responseBody); + + MethodCallRetVal descTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(descTbl.getAssertMsg(), HttpStatus.OK_200, descTbl.httpStatusCode); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void describeNoSuchTable() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + + "/ddl/database/default/table/no_such_table", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, + p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), + ErrorMsg.INVALID_TABLE.getErrorCode(), + getErrorCode(p.responseBody)); + } + /** + * It's expected that Templeton returns a properly formatted JSON object when it + * encounters an error. It should have {@code ERROR_CODE} element in it which + * should be the Hive canonical error msg code. + * @return the code or -1 if it cannot be found + */ + private static int getErrorCode(String jsonErrorObject) throws IOException { + @SuppressWarnings("unchecked")//JSON key is always a String + Map retProps = JsonBuilder.jsonToMap(jsonErrorObject + "blah blah"); + int hiveRetCode = -1; + if(retProps.get(ERROR_CODE) !=null) { + hiveRetCode = Integer.parseInt(retProps.get(ERROR_CODE).toString()); } - /** - * tries to drop table that doesn't exist (with ifExists=true) - */ - @Ignore("not ready due to HIVE-4824") - @Test - public void dropTableIfExists() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/no_such_table", - HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] - {new NameValuePair("ifExists", "true")}); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + return hiveRetCode; + } + /** + * Encapsulates information from HTTP method call + */ + private static class MethodCallRetVal { + private final int httpStatusCode; + private final String responseBody; + private final String submittedURL; + private final String methodName; + private MethodCallRetVal(int httpStatusCode, String responseBody, String submittedURL, String methodName) { + this.httpStatusCode = httpStatusCode; + this.responseBody = responseBody; + this.submittedURL = submittedURL; + this.methodName = methodName; } - @Ignore("not ready due to HIVE-4824") - @Test - public void createDataBase() throws IOException { - Map props = new HashMap(); - props.put("comment", "Hello, there"); - props.put("location", "file://" + System.getProperty("hive.metastore.warehouse.dir")); - Map props2 = new HashMap(); - props2.put("prop", "val"); - props.put("properties", props2); - //{ "comment":"Hello there", "location":"file:///tmp/warehouse", "properties":{"a":"b"}} - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/newdb", HTTP_METHOD_TYPE.PUT, props, null); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + String getAssertMsg() { + return methodName + " " + submittedURL + " " + responseBody; } - @Ignore("not ready due to HIVE-4824") - @Test - public void createTable() throws IOException { - //{ "comment":"test", "columns": [ { "name": "col1", "type": "string" } ], "format": { "storedAs": "rcfile" } } - Map props = new HashMap(); - props.put("comment", "Table in default db"); - Map col = new HashMap(); - col.put("name", "col1"); - col.put("type", "string"); - List> colList = new ArrayList>(1); - colList.add(col); - props.put("columns", colList); - Map format = new HashMap(); - format.put("storedAs", "rcfile"); - props.put("format", format); - MethodCallRetVal createTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.PUT, props, null); - Assert.assertEquals(createTbl.getAssertMsg(), HttpStatus.OK_200, createTbl.httpStatusCode); - LOG.info("createTable() resp: " + createTbl.responseBody); - - MethodCallRetVal descTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(descTbl.getAssertMsg(), HttpStatus.OK_200, descTbl.httpStatusCode); + } + private static enum HTTP_METHOD_TYPE {GET, POST, DELETE, PUT} + private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type) throws IOException { + return doHttpCall(uri, type, null, null); + } + /** + * Does a basic HTTP GET and returns Http Status code + response body + * Will add the dummy user query string + */ + private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type, Map data, NameValuePair[] params) throws IOException { + HttpClient client = new HttpClient(); + HttpMethod method; + switch (type) { + case GET: + method = new GetMethod(uri); + break; + case DELETE: + method = new DeleteMethod(uri); + break; + case PUT: + method = new PutMethod(uri); + if(data == null) { + break; + } + String msgBody = JsonBuilder.mapToJson(data); + LOG.info("Msg Body: " + msgBody); + StringRequestEntity sre = new StringRequestEntity(msgBody, "application/json", charSet); + ((PutMethod)method).setRequestEntity(sre); + break; + default: + throw new IllegalArgumentException("Unsupported method type: " + type); } - @Ignore("not ready due to HIVE-4824") - @Test - public void describeNoSuchTable() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + - "/ddl/database/default/table/no_such_table", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, - p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), - ErrorMsg.INVALID_TABLE.getErrorCode(), - getErrorCode(p.responseBody)); + if(params == null) { + method.setQueryString(new NameValuePair[] {new NameValuePair("user.name", username)}); } - /** - * It's expected that Templeton returns a properly formatted JSON object when it - * encounters an error. It should have {@code ERROR_CODE} element in it which - * should be the Hive canonical error msg code. - * @return the code or -1 if it cannot be found - */ - private static int getErrorCode(String jsonErrorObject) throws IOException { - @SuppressWarnings("unchecked")//JSON key is always a String - Map retProps = JsonBuilder.jsonToMap(jsonErrorObject + "blah blah"); - int hiveRetCode = -1; - if(retProps.get(ERROR_CODE) !=null) { - hiveRetCode = Integer.parseInt(retProps.get(ERROR_CODE).toString()); - } - return hiveRetCode; + else { + NameValuePair[] newParams = new NameValuePair[params.length + 1]; + System.arraycopy(params, 0, newParams, 1, params.length); + newParams[0] = new NameValuePair("user.name", username); + method.setQueryString(newParams); } - /** - * Encapsulates information from HTTP method call - */ - private static class MethodCallRetVal { - private final int httpStatusCode; - private final String responseBody; - private final String submittedURL; - private final String methodName; - private MethodCallRetVal(int httpStatusCode, String responseBody, String submittedURL, String methodName) { - this.httpStatusCode = httpStatusCode; - this.responseBody = responseBody; - this.submittedURL = submittedURL; - this.methodName = methodName; - } - String getAssertMsg() { - return methodName + " " + submittedURL + " " + responseBody; - } + String actualUri = "no URI"; + try { + actualUri = method.getURI().toString();//should this be escaped string? + LOG.debug(type + ": " + method.getURI().getEscapedURI()); + int httpStatus = client.executeMethod(method); + LOG.debug("Http Status Code=" + httpStatus); + String resp = method.getResponseBodyAsString(); + LOG.debug("response: " + resp); + return new MethodCallRetVal(httpStatus, resp, actualUri, method.getName()); } - private static enum HTTP_METHOD_TYPE {GET, POST, DELETE, PUT} - private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type) throws IOException { - return doHttpCall(uri, type, null, null); + catch (IOException ex) { + LOG.error("doHttpCall() failed", ex); } - /** - * Does a basic HTTP GET and returns Http Status code + response body - * Will add the dummy user query string - */ - private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type, Map data, NameValuePair[] params) throws IOException { - HttpClient client = new HttpClient(); - HttpMethod method; - switch (type) { - case GET: - method = new GetMethod(uri); - break; - case DELETE: - method = new DeleteMethod(uri); - break; - case PUT: - method = new PutMethod(uri); - if(data == null) { - break; - } - String msgBody = JsonBuilder.mapToJson(data); - LOG.info("Msg Body: " + msgBody); - StringRequestEntity sre = new StringRequestEntity(msgBody, "application/json", charSet); - ((PutMethod)method).setRequestEntity(sre); - break; - default: - throw new IllegalArgumentException("Unsupported method type: " + type); - } - if(params == null) { - method.setQueryString(new NameValuePair[] {new NameValuePair("user.name", username)}); - } - else { - NameValuePair[] newParams = new NameValuePair[params.length + 1]; - System.arraycopy(params, 0, newParams, 1, params.length); - newParams[0] = new NameValuePair("user.name", username); - method.setQueryString(newParams); - } - String actualUri = "no URI"; - try { - actualUri = method.getURI().toString();//should this be escaped string? - LOG.debug(type + ": " + method.getURI().getEscapedURI()); - int httpStatus = client.executeMethod(method); - LOG.debug("Http Status Code=" + httpStatus); - String resp = method.getResponseBodyAsString(); - LOG.debug("response: " + resp); - return new MethodCallRetVal(httpStatus, resp, actualUri, method.getName()); - } - catch (IOException ex) { - LOG.error("doHttpCall() failed", ex); - } - finally { - method.releaseConnection(); - } - return new MethodCallRetVal(-1, "Http " + type + " failed; see log file for details", actualUri, method.getName()); + finally { + method.releaseConnection(); } + return new MethodCallRetVal(-1, "Http " + type + " failed; see log file for details", actualUri, method.getName()); + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockExecService.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockExecService.java index 90a9551..0e68f63 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockExecService.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockExecService.java @@ -29,21 +29,21 @@ public class MockExecService implements ExecService { - public ExecBean run(String program, List args, - Map env) { - ExecBean bean = new ExecBean(); - bean.stdout = program; - bean.stderr = args.toString(); - return bean; - } + public ExecBean run(String program, List args, + Map env) { + ExecBean bean = new ExecBean(); + bean.stdout = program; + bean.stderr = args.toString(); + return bean; + } - @Override - public ExecBean runUnlimited(String program, - List args, Map env) - throws NotAuthorizedException, ExecuteException, IOException { - ExecBean bean = new ExecBean(); - bean.stdout = program; - bean.stderr = args.toString(); - return null; - } + @Override + public ExecBean runUnlimited(String program, + List args, Map env) + throws NotAuthorizedException, ExecuteException, IOException { + ExecBean bean = new ExecBean(); + bean.stdout = program; + bean.stderr = args.toString(); + return null; + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockServer.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockServer.java index 0bee30a..4b2099c 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockServer.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockServer.java @@ -24,18 +24,18 @@ * Test that the server code exists. */ public class MockServer extends Server { - public String user; + public String user; - public MockServer() { - execService = new MockExecService(); - resetUser(); - } + public MockServer() { + execService = new MockExecService(); + resetUser(); + } - public void resetUser() { - user = System.getenv("USER"); - } + public void resetUser() { + user = System.getenv("USER"); + } - public String getUser() { - return user; - } + public String getUser() { + return user; + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockUriInfo.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockUriInfo.java index 4ef71b6..d69a733 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockUriInfo.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockUriInfo.java @@ -30,110 +30,110 @@ public class MockUriInfo implements UriInfo { - @Override - public URI getAbsolutePath() { - // TODO Auto-generated method stub - return null; - } - - @Override - public UriBuilder getAbsolutePathBuilder() { - // TODO Auto-generated method stub - return null; - } - - @Override - public URI getBaseUri() { - try { - return new URI("http://fakeuri/templeton"); - } catch (URISyntaxException e) { - e.printStackTrace(); - } - return null; - } - - @Override - public UriBuilder getBaseUriBuilder() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getMatchedResources() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getMatchedURIs() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getMatchedURIs(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public String getPath() { - // TODO Auto-generated method stub - return null; - } - - @Override - public String getPath(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getPathParameters() { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getPathParameters(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getPathSegments() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getPathSegments(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getQueryParameters() { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getQueryParameters(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public URI getRequestUri() { - // TODO Auto-generated method stub - return null; - } - - @Override - public UriBuilder getRequestUriBuilder() { - // TODO Auto-generated method stub - return null; - } + @Override + public URI getAbsolutePath() { + // TODO Auto-generated method stub + return null; + } + + @Override + public UriBuilder getAbsolutePathBuilder() { + // TODO Auto-generated method stub + return null; + } + + @Override + public URI getBaseUri() { + try { + return new URI("http://fakeuri/templeton"); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return null; + } + + @Override + public UriBuilder getBaseUriBuilder() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getMatchedResources() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getMatchedURIs() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getMatchedURIs(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getPath() { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getPath(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getPathParameters() { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getPathParameters(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getPathSegments() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getPathSegments(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getQueryParameters() { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getQueryParameters(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public URI getRequestUri() { + // TODO Auto-generated method stub + return null; + } + + @Override + public UriBuilder getRequestUriBuilder() { + // TODO Auto-generated method stub + return null; + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java index 9e4f87a..f48c548 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java @@ -30,219 +30,219 @@ import org.junit.Test; public class TestTempletonUtils { - public static final String[] CONTROLLER_LINES = { - "2011-12-15 18:12:21,758 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201112140012_0047", - "2011-12-15 18:12:46,907 [main] INFO org.apache.pig.tools.pigstats.SimplePigStats - Script Statistics: " - }; - public static final String testDataDir = System.getProperty("test.data.dir"); - File tmpFile; - File usrFile; - - @Before - public void setup() { - try { - tmpFile = new File(testDataDir, "tmp"); - tmpFile.createNewFile(); - usrFile = new File(testDataDir, "usr"); - usrFile.createNewFile(); - } catch (IOException ex) { - Assert.fail(ex.getMessage()); - } + public static final String[] CONTROLLER_LINES = { + "2011-12-15 18:12:21,758 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201112140012_0047", + "2011-12-15 18:12:46,907 [main] INFO org.apache.pig.tools.pigstats.SimplePigStats - Script Statistics: " + }; + public static final String testDataDir = System.getProperty("test.data.dir"); + File tmpFile; + File usrFile; + + @Before + public void setup() { + try { + tmpFile = new File(testDataDir, "tmp"); + tmpFile.createNewFile(); + usrFile = new File(testDataDir, "usr"); + usrFile.createNewFile(); + } catch (IOException ex) { + Assert.fail(ex.getMessage()); } - - @After - public void tearDown() { - tmpFile.delete(); - usrFile.delete(); + } + + @After + public void tearDown() { + tmpFile.delete(); + usrFile.delete(); + } + + @Test + public void testIssetString() { + Assert.assertFalse(TempletonUtils.isset((String)null)); + Assert.assertFalse(TempletonUtils.isset("")); + Assert.assertTrue(TempletonUtils.isset("hello")); + } + + @Test + public void testIssetTArray() { + Assert.assertFalse(TempletonUtils.isset((Long[]) null)); + Assert.assertFalse(TempletonUtils.isset(new String[0])); + String[] parts = new String("hello.world").split("\\."); + Assert.assertTrue(TempletonUtils.isset(parts)); + } + + @Test + public void testPrintTaggedJobID() { + //JobID job = new JobID(); + // TODO -- capture System.out? + } + + + @Test + public void testExtractPercentComplete() { + Assert.assertNull(TempletonUtils.extractPercentComplete("fred")); + for (String line : CONTROLLER_LINES) { + Assert.assertNull(TempletonUtils.extractPercentComplete(line)); } - @Test - public void testIssetString() { - Assert.assertFalse(TempletonUtils.isset((String)null)); - Assert.assertFalse(TempletonUtils.isset("")); - Assert.assertTrue(TempletonUtils.isset("hello")); + String fifty = "2011-12-15 18:12:36,333 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 50% complete"; + Assert.assertEquals("50% complete", TempletonUtils.extractPercentComplete(fifty)); + } + + @Test + public void testEncodeArray() { + Assert.assertEquals(null, TempletonUtils.encodeArray((String []) null)); + String[] tmp = new String[0]; + Assert.assertTrue(TempletonUtils.encodeArray(new String[0]).length() == 0); + tmp = new String[3]; + tmp[0] = "fred"; + tmp[1] = null; + tmp[2] = "peter,lisa,, barney"; + Assert.assertEquals("fred,,peter" + + StringUtils.ESCAPE_CHAR + ",lisa" + StringUtils.ESCAPE_CHAR + "," + + StringUtils.ESCAPE_CHAR + ", barney", + TempletonUtils.encodeArray(tmp)); + } + + @Test + public void testDecodeArray() { + Assert.assertTrue(TempletonUtils.encodeArray((String[]) null) == null); + String[] tmp = new String[3]; + tmp[0] = "fred"; + tmp[1] = null; + tmp[2] = "peter,lisa,, barney"; + String[] tmp2 = TempletonUtils.decodeArray(TempletonUtils.encodeArray(tmp)); + try { + for (int i=0; i< tmp.length; i++) { + Assert.assertEquals((String) tmp[i], (String)tmp2[i]); + } + } catch (Exception e) { + Assert.fail("Arrays were not equal" + e.getMessage()); } - - @Test - public void testIssetTArray() { - Assert.assertFalse(TempletonUtils.isset((Long[]) null)); - Assert.assertFalse(TempletonUtils.isset(new String[0])); - String[] parts = new String("hello.world").split("\\."); - Assert.assertTrue(TempletonUtils.isset(parts)); + } + + @Test + public void testHadoopFsPath() { + try { + TempletonUtils.hadoopFsPath(null, null, null); + TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), null, null); + TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), new Configuration(), null); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find " + tmpFile.toURI().toString()); + } catch (Exception e) { + // This is our problem -- it means the configuration was wrong. + e.printStackTrace(); } - - @Test - public void testPrintTaggedJobID() { - //JobID job = new JobID(); - // TODO -- capture System.out? + try { + TempletonUtils.hadoopFsPath("/scoobydoo/teddybear", + new Configuration(), null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // This is our problem -- it means the configuration was wrong. + e.printStackTrace(); } - - - @Test - public void testExtractPercentComplete() { - Assert.assertNull(TempletonUtils.extractPercentComplete("fred")); - for (String line : CONTROLLER_LINES) { - Assert.assertNull(TempletonUtils.extractPercentComplete(line)); - } - - String fifty = "2011-12-15 18:12:36,333 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 50% complete"; - Assert.assertEquals("50% complete", TempletonUtils.extractPercentComplete(fifty)); + try { + TempletonUtils.hadoopFsPath("a", new Configuration(), "teddybear"); + Assert.fail("Should not have found /user/teddybear/a"); + } catch (FileNotFoundException e) { + Assert.assertTrue(e.getMessage().contains("/user/teddybear/a")); + } catch (Exception e) { + // This is our problem -- it means the configuration was wrong. + e.printStackTrace(); + Assert.fail("Get wrong exception: " + e.getMessage()); } - - @Test - public void testEncodeArray() { - Assert.assertEquals(null, TempletonUtils.encodeArray((String []) null)); - String[] tmp = new String[0]; - Assert.assertTrue(TempletonUtils.encodeArray(new String[0]).length() == 0); - tmp = new String[3]; - tmp[0] = "fred"; - tmp[1] = null; - tmp[2] = "peter,lisa,, barney"; - Assert.assertEquals("fred,,peter" + - StringUtils.ESCAPE_CHAR + ",lisa" + StringUtils.ESCAPE_CHAR + "," + - StringUtils.ESCAPE_CHAR + ", barney", - TempletonUtils.encodeArray(tmp)); + } + + @Test + public void testHadoopFsFilename() { + try { + Assert.assertEquals(null, TempletonUtils.hadoopFsFilename(null, null, null)); + Assert.assertEquals(null, + TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), null, null)); + Assert.assertEquals(tmpFile.toURI().toString(), + TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), + new Configuration(), + null)); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find name for /tmp"); + Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); + } catch (Exception e) { + // Something else is wrong + e.printStackTrace(); } - - @Test - public void testDecodeArray() { - Assert.assertTrue(TempletonUtils.encodeArray((String[]) null) == null); - String[] tmp = new String[3]; - tmp[0] = "fred"; - tmp[1] = null; - tmp[2] = "peter,lisa,, barney"; - String[] tmp2 = TempletonUtils.decodeArray(TempletonUtils.encodeArray(tmp)); - try { - for (int i=0; i< tmp.length; i++) { - Assert.assertEquals((String) tmp[i], (String)tmp2[i]); - } - } catch (Exception e) { - Assert.fail("Arrays were not equal" + e.getMessage()); - } + try { + TempletonUtils.hadoopFsFilename("/scoobydoo/teddybear", + new Configuration(), null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // Something else is wrong. + e.printStackTrace(); } - - @Test - public void testHadoopFsPath() { - try { - TempletonUtils.hadoopFsPath(null, null, null); - TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), null, null); - TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), new Configuration(), null); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find " + tmpFile.toURI().toString()); - } catch (Exception e) { - // This is our problem -- it means the configuration was wrong. - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsPath("/scoobydoo/teddybear", - new Configuration(), null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // This is our problem -- it means the configuration was wrong. - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsPath("a", new Configuration(), "teddybear"); - Assert.fail("Should not have found /user/teddybear/a"); - } catch (FileNotFoundException e) { - Assert.assertTrue(e.getMessage().contains("/user/teddybear/a")); - } catch (Exception e) { - // This is our problem -- it means the configuration was wrong. - e.printStackTrace(); - Assert.fail("Get wrong exception: " + e.getMessage()); - } + } + + @Test + public void testHadoopFsListAsArray() { + try { + Assert.assertTrue(TempletonUtils.hadoopFsListAsArray(null, null, null) == null); + Assert.assertTrue(TempletonUtils.hadoopFsListAsArray( + tmpFile.toURI().toString() + "," + usrFile.toString(), null, null) == null); + String[] tmp2 = TempletonUtils.hadoopFsListAsArray( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + new Configuration(), null); + Assert.assertEquals(tmpFile.toURI().toString(), tmp2[0]); + Assert.assertEquals(usrFile.toURI().toString(), tmp2[1]); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); + } catch (Exception e) { + // Something else is wrong + e.printStackTrace(); } - - @Test - public void testHadoopFsFilename() { - try { - Assert.assertEquals(null, TempletonUtils.hadoopFsFilename(null, null, null)); - Assert.assertEquals(null, - TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), null, null)); - Assert.assertEquals(tmpFile.toURI().toString(), - TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), - new Configuration(), - null)); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find name for /tmp"); - Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); - } catch (Exception e) { - // Something else is wrong - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsFilename("/scoobydoo/teddybear", - new Configuration(), null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // Something else is wrong. - e.printStackTrace(); - } + try { + TempletonUtils.hadoopFsListAsArray("/scoobydoo/teddybear,joe", + new Configuration(), + null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // Something else is wrong. + e.printStackTrace(); } - - @Test - public void testHadoopFsListAsArray() { - try { - Assert.assertTrue(TempletonUtils.hadoopFsListAsArray(null, null, null) == null); - Assert.assertTrue(TempletonUtils.hadoopFsListAsArray( - tmpFile.toURI().toString() + "," + usrFile.toString(), null, null) == null); - String[] tmp2 = TempletonUtils.hadoopFsListAsArray( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - new Configuration(), null); - Assert.assertEquals(tmpFile.toURI().toString(), tmp2[0]); - Assert.assertEquals(usrFile.toURI().toString(), tmp2[1]); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); - } catch (Exception e) { - // Something else is wrong - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsListAsArray("/scoobydoo/teddybear,joe", - new Configuration(), - null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // Something else is wrong. - e.printStackTrace(); - } + } + + @Test + public void testHadoopFsListAsString() { + try { + Assert.assertTrue(TempletonUtils.hadoopFsListAsString(null, null, null) == null); + Assert.assertTrue(TempletonUtils.hadoopFsListAsString( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + null, null) == null); + Assert.assertEquals( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + TempletonUtils.hadoopFsListAsString( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + new Configuration(), null)); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); + } catch (Exception e) { + // Something else is wrong + e.printStackTrace(); } - - @Test - public void testHadoopFsListAsString() { - try { - Assert.assertTrue(TempletonUtils.hadoopFsListAsString(null, null, null) == null); - Assert.assertTrue(TempletonUtils.hadoopFsListAsString( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - null, null) == null); - Assert.assertEquals( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - TempletonUtils.hadoopFsListAsString( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - new Configuration(), null)); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); - } catch (Exception e) { - // Something else is wrong - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsListAsString("/scoobydoo/teddybear,joe", - new Configuration(), - null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // Something else is wrong. - e.printStackTrace(); - } + try { + TempletonUtils.hadoopFsListAsString("/scoobydoo/teddybear,joe", + new Configuration(), + null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // Something else is wrong. + e.printStackTrace(); } + } } diff --git a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTrivialExecService.java b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTrivialExecService.java index a9b151c..a873a96 100644 --- a/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTrivialExecService.java +++ b/hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTrivialExecService.java @@ -27,43 +27,43 @@ import org.junit.Test; public class TestTrivialExecService { - @Test - public void test() { - ArrayList list = new ArrayList(); - list.add("echo"); - list.add("success"); - BufferedReader out = null; - BufferedReader err = null; - try { - Process process = TrivialExecService.getInstance() - .run(list, - new ArrayList(), - new HashMap()); - out = new BufferedReader(new InputStreamReader( - process.getInputStream())); - err = new BufferedReader(new InputStreamReader( - process.getErrorStream())); - Assert.assertEquals("success", out.readLine()); - out.close(); - String line; - while ((line = err.readLine()) != null) { - Assert.fail(line); - } - process.waitFor(); - } catch (Exception e) { - e.printStackTrace(); - Assert.fail("Process caused exception."); - } finally { - try { - out.close(); - } catch (Exception ex) { - // Whatever. - } - try { - err.close(); - } catch (Exception ex) { - // Whatever - } - } + @Test + public void test() { + ArrayList list = new ArrayList(); + list.add("echo"); + list.add("success"); + BufferedReader out = null; + BufferedReader err = null; + try { + Process process = TrivialExecService.getInstance() + .run(list, + new ArrayList(), + new HashMap()); + out = new BufferedReader(new InputStreamReader( + process.getInputStream())); + err = new BufferedReader(new InputStreamReader( + process.getErrorStream())); + Assert.assertEquals("success", out.readLine()); + out.close(); + String line; + while ((line = err.readLine()) != null) { + Assert.fail(line); + } + process.waitFor(); + } catch (Exception e) { + e.printStackTrace(); + Assert.fail("Process caused exception."); + } finally { + try { + out.close(); + } catch (Exception ex) { + // Whatever. + } + try { + err.close(); + } catch (Exception ex) { + // Whatever + } } + } }