Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputStorageDriver.java.broken =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputStorageDriver.java.broken (revision 1295948) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputStorageDriver.java.broken (working copy) @@ -1,458 +0,0 @@ -package org.apache.hcatalog.hbase; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -import org.junit.Test; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Map; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -/** - * Tests components of HBaseBulkOutputStorageDriver using ManyMiniCluster. - * Including ImprtSequenceFile, HBaseOutputStorageDrivers and HBaseBulkOutputFormat - */ -public class TestHBaseBulkOutputStorageDriver extends SkeletonHBaseTest { - private final static Log LOG = LogFactory.getLog(TestHBaseBulkOutputStorageDriver.class); - - private final HiveConf allConf; - private final HCatDriver hcatDriver; - - public TestHBaseBulkOutputStorageDriver() { - allConf = getHiveConf(); - allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); - allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(),"warehouse").toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) - allConf.set(el.getKey(), el.getValue()); - for (Map.Entry el : getJobConf()) - allConf.set(el.getKey(), el.getValue()); - - SessionState.start(new CliSessionState(allConf)); - hcatDriver = new HCatDriver(); - } - - public static class MapWrite extends Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for(int i=1;i { - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo)HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key",schema,Integer.parseInt(vals[0])); - for(int i=1;i el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - - } - - @Test - public void TestSnapshotConversion() throws Exception{ - Initialize(); - String tableName = newTableName("mytableOne"); - String databaseName = newTableName("mydatabase"); - String fullyQualTableName = databaseName + "." + tableName; - String db_dir = getTestDir() + "/hbasedb"; - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" - + db_dir + "'"; - String tableQuery = "CREATE TABLE " + fullyQualTableName - + "(key string, value1 string, value2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf2:q2')" ; - - CommandProcessorResponse cmdResponse = hcatDriver.run(dbquery); - assertEquals(0, cmdResponse.getResponseCode()); - cmdResponse = hcatDriver.run(tableQuery); - assertEquals(0, cmdResponse.getResponseCode()); - - InputJobInfo inputInfo = InputJobInfo.create(databaseName, tableName, null, null, null); - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - Job job = new Job(conf); - InitializeInput.setInput(job, inputInfo); - String modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); - - Map revMap = new HashMap(); - revMap.put("cf1", 3L); - revMap.put("cf2", 5L); - TableSnapshot hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap,-1); - HCatTableSnapshot hcatSnapshot = HBaseInputStorageDriver.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); - - assertEquals(hcatSnapshot.getRevision("value1"), 3); - assertEquals(hcatSnapshot.getRevision("value2"), 5); - - String dropTable = "DROP TABLE " + fullyQualTableName; - cmdResponse = hcatDriver.run(dropTable); - assertEquals(0, cmdResponse.getResponseCode()); - - tableName = newTableName("mytableTwo"); - fullyQualTableName = databaseName + "." + tableName; - tableQuery = "CREATE TABLE " + fullyQualTableName - + "(key string, value1 string, value2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf1:q2')" ; - cmdResponse = hcatDriver.run(tableQuery); - assertEquals(0, cmdResponse.getResponseCode()); - revMap.clear(); - revMap.put("cf1", 3L); - hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); - inputInfo = InputJobInfo.create(databaseName, tableName, null, null, null); - InitializeInput.setInput(job, inputInfo); - modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); - hcatSnapshot = HBaseInputStorageDriver.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); - assertEquals(hcatSnapshot.getRevision("value1"), 3); - assertEquals(hcatSnapshot.getRevision("value2"), 3); - - dropTable = "DROP TABLE " + fullyQualTableName; - cmdResponse = hcatDriver.run(dropTable); - assertEquals(0, cmdResponse.getResponseCode()); - - String dropDatabase = "DROP DATABASE IF EXISTS " + databaseName + "CASCADE"; - cmdResponse = hcatDriver.run(dropDatabase); - assertEquals(0, cmdResponse.getResponseCode()); - } - -} Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputStorageDriver.java.broken =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputStorageDriver.java.broken (revision 1295948) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputStorageDriver.java.broken (working copy) @@ -1,310 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hcatalog.hbase; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.junit.Test; - -public class TestHBaseInputStorageDriver extends SkeletonHBaseTest { - - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - private final byte[] FAMILY = Bytes.toBytes("testFamily"); - private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1"); - private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2"); - - private List generatePuts(int num, String tableName) throws IOException { - - List columnFamilies = Arrays.asList("testFamily"); - RevisionManager rm = null; - List myPuts; - try { - rm = HBaseHCatStorageHandler - .getOpenedRevisionManager(getHbaseConf()); - rm.open(); - myPuts = new ArrayList(); - for (int i = 1; i <= num; i++) { - Put put = new Put(Bytes.toBytes("testRow")); - put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i)); - put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i)); - myPuts.add(put); - Transaction tsx = rm.beginWriteTransaction(tableName, - columnFamilies); - rm.commitWriteTransaction(tsx); - } - } finally { - if (rm != null) - rm.close(); - } - - return myPuts; - } - - private void Initialize() throws Exception { - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - - } - - private void populateHBaseTable(String tName, int revisions) throws IOException { - List myPuts = generatePuts(revisions, tName); - HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); - table.put(myPuts); - } - - @Test - public void TestHBaseTableReadMR() throws Exception { - Initialize(); - String tableName = newTableName("mytable"); - String databaseName = newTableName("mydatabase"); - String db_dir = getTestDir() + "/hbasedb"; - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" - + db_dir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" ; - - CommandProcessorResponse responseOne = hcatDriver.run(dbquery); - assertEquals(0, responseOne.getResponseCode()); - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - String hbaseTableName = databaseName + "." + tableName; - boolean doesTableExist = hAdmin.tableExists(hbaseTableName); - assertTrue(doesTableExist); - - populateHBaseTable(hbaseTableName, 5); - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - Job job = new Job(conf, "hbase-mr-read-test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadHTable.class); - - job.setInputFormatClass(HCatInputFormat.class); - InputJobInfo inputJobInfo = InputJobInfo.create(databaseName, tableName, - null, null, null); - HCatInputFormat.setInput(job, inputJobInfo); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - assertFalse(MapReadHTable.error); - assertEquals(MapReadHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + hbaseTableName ; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); - assertFalse(isHbaseTableThere); - - String dropDB = "DROP DATABASE " + databaseName; - CommandProcessorResponse responseFour = hcatDriver.run(dropDB); - assertEquals(0, responseFour.getResponseCode()); - } - - @Test - public void TestHBaseTableProjectionReadMR() throws Exception { - - Initialize(); - String tableName = newTableName("mytable"); - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + - "testFamily:testQualifier1,testFamily:testQualifier2')" ; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - populateHBaseTable(tableName, 5); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - Job job = new Job(conf, "hbase-column-projection"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadProjHTable.class); - job.setInputFormatClass(HCatInputFormat.class); - InputJobInfo inputJobInfo = InputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, null, null, - null); - HCatInputFormat.setOutputSchema(job, getProjectionSchema()); - HCatInputFormat.setInput(job, inputJobInfo); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - assertFalse(MapReadProjHTable.error); - assertEquals(MapReadProjHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + tableName ; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(tableName); - assertFalse(isHbaseTableThere); - } - - - static class MapReadHTable - extends - Mapper { - - static boolean error = false; - static int count = 0; - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 3) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-5") - && (value.get(2).toString()).equalsIgnoreCase("textValue-5"); - - if (correctValues == false) { - error = true; - } - count++; - } - } - - static class MapReadProjHTable - extends - Mapper { - - static boolean error = false; - static int count = 0; - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 2) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-5"); - - if (correctValues == false) { - error = true; - } - count++; - } - } - - private HCatSchema getProjectionSchema() throws HCatException { - - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING, - "")); - schema.append(new HCatFieldSchema("testqualifier1", - HCatFieldSchema.Type.STRING, "")); - return schema; - } - - -} Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java (revision 0) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java (revision 0) @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hcatalog.hbase; + +import static org.junit.Assert.assertEquals; + +import java.net.URI; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hcatalog.cli.HCatDriver; +import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hcatalog.mapreduce.InitializeInput; +import org.apache.hcatalog.mapreduce.InputJobInfo; +import org.junit.Test; + +public class TestSnapshots extends SkeletonHBaseTest { + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + + public void Initialize() throws Exception { + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + @Test + public void TestSnapshotConversion() throws Exception{ + Initialize(); + String tableName = newTableName("mytableOne"); + String databaseName = newTableName("mydatabase"); + String fullyQualTableName = databaseName + "." + tableName; + String db_dir = getTestDir() + "/hbasedb"; + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + + db_dir + "'"; + String tableQuery = "CREATE TABLE " + fullyQualTableName + + "(key string, value1 string, value2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf2:q2')" ; + + CommandProcessorResponse cmdResponse = hcatDriver.run(dbquery); + assertEquals(0, cmdResponse.getResponseCode()); + cmdResponse = hcatDriver.run(tableQuery); + assertEquals(0, cmdResponse.getResponseCode()); + + InputJobInfo inputInfo = InputJobInfo.create(databaseName, tableName, null, null, null); + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + Job job = new Job(conf); + inputInfo.getProperties().setProperty(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, "dummysnapshot"); + InitializeInput.setInput(job, inputInfo); + String modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); + + Map revMap = new HashMap(); + revMap.put("cf1", 3L); + revMap.put("cf2", 5L); + TableSnapshot hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap,-1); + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); + + assertEquals(hcatSnapshot.getRevision("value1"), 3); + assertEquals(hcatSnapshot.getRevision("value2"), 5); + + String dropTable = "DROP TABLE " + fullyQualTableName; + cmdResponse = hcatDriver.run(dropTable); + assertEquals(0, cmdResponse.getResponseCode()); + + tableName = newTableName("mytableTwo"); + fullyQualTableName = databaseName + "." + tableName; + tableQuery = "CREATE TABLE " + fullyQualTableName + + "(key string, value1 string, value2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf1:q2')" ; + cmdResponse = hcatDriver.run(tableQuery); + assertEquals(0, cmdResponse.getResponseCode()); + revMap.clear(); + revMap.put("cf1", 3L); + hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); + inputInfo = InputJobInfo.create(databaseName, tableName, null, null, null); + inputInfo.getProperties().setProperty(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, "dummysnapshot"); + InitializeInput.setInput(job, inputInfo); + modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); + hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); + assertEquals(hcatSnapshot.getRevision("value1"), 3); + assertEquals(hcatSnapshot.getRevision("value2"), 3); + + dropTable = "DROP TABLE " + fullyQualTableName; + cmdResponse = hcatDriver.run(dropTable); + assertEquals(0, cmdResponse.getResponseCode()); + + String dropDatabase = "DROP DATABASE IF EXISTS " + databaseName + "CASCADE"; + cmdResponse = hcatDriver.run(dropDatabase); + assertEquals(0, cmdResponse.getResponseCode()); + } + +} Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java (revision 0) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java (revision 0) @@ -0,0 +1,480 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hcatalog.hbase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapred.TableOutputFormat; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hcatalog.cli.HCatDriver; +import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.DefaultHCatRecord; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.hbase.snapshot.FamilyRevision; +import org.apache.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hcatalog.hbase.snapshot.Transaction; +import org.apache.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hcatalog.mapreduce.InputJobInfo; +import org.apache.hcatalog.mapreduce.OutputJobInfo; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Test HBaseDirectOUtputFormat and HBaseHCatStorageHandler using a MiniCluster + */ +public class TestHBaseDirectOutputFormat extends SkeletonHBaseTest { + + private final HiveConf allConf; + private final HCatDriver hcatDriver; + + public TestHBaseDirectOutputFormat() { + allConf = getHiveConf(); + allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); + allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(),"warehouse").toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) + allConf.set(el.getKey(), el.getValue()); + for (Map.Entry el : getJobConf()) + allConf.set(el.getKey(), el.getValue()); + + SessionState.start(new CliSessionState(allConf)); + hcatDriver = new HCatDriver(); + } + + @Test + public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { + String testName = "directOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(),testName); + + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + //create table + createTable(tableName,new String[]{familyName}); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:ONE,spanish:DOS", + "3,english:ONE,spanish:TRES"}; + + + + // input/output settings + Path inputPath = new Path(methodTestDir,"mr_input"); + getFileSystem().mkdirs(inputPath); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath,"inputFile.txt")); + for(String line: data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + + //create job + JobConf job = new JobConf(conf); + job.setJobName(testName); + job.setWorkingDirectory(new Path(methodTestDir,"mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapWrite.class); + + job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); + org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); + + job.setOutputFormat(HBaseDirectOutputFormat.class); + job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); + + //manually create transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); + Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + HCatUtil.serialize(txn)); + job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + } finally { + rm.close(); + } + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index=0; + for(Result result: scanner) { + String vals[] = data[index].toString().split(","); + for(int i=1;i abortedWriteTransactions = rm.getAbortedWriteTransactions( + databaseName + "." + tableName, family); + assertEquals(1, abortedWriteTransactions.size()); + assertEquals(1, abortedWriteTransactions.get(0).getRevision()); + } + } finally { + rm.close(); + } + + // verify that hbase has the records of the successful maps. + HTable table = new HTable(conf, databaseName + "." + tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], + Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0) + .getTimestamp()); + } + index++; + } + assertEquals(data.length - 1, index); + + // verify that the inputformat returns empty results. + Path outputDir = new Path(getTestDir(), + "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + job = new Job(conf, "hbase-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadAbortedTransaction.class); + job.setInputFormatClass(HCatInputFormat.class); + InputJobInfo inputJobInfo = InputJobInfo.create(databaseName, + tableName, null, null, null); + HCatInputFormat.setInput(job, inputJobInfo); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + } + + private Job configureJob(String jobName, Configuration conf, + Path workingDir, Class mapperClass, + OutputJobInfo outputJobInfo, Path inputPath) throws IOException { + Job job = new Job(conf, jobName); + job.setWorkingDirectory(workingDir); + job.setJarByClass(this.getClass()); + job.setMapperClass(mapperClass); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + return job; + } + + public static class MapHCatWrite extends Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo)HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key",schema,Integer.parseInt(vals[0])); + for(int i=1;i { + + @Override + public void configure(JobConf job) { + } + + @Override + public void close() throws IOException { + } + + @Override + public void map(LongWritable key, Text value, + OutputCollector output, Reporter reporter) + throws IOException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for(int i=1;i { + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo)HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key", schema, Integer.parseInt(vals[0])); + if (vals[0].equals("3")) { + throw new IOException("Failing map to test abort"); + } + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + record.set(pair[0], schema, pair[1]); + } + context.write(null, record); + } + + } + + static class MapReadAbortedTransaction + extends + Mapper, Text> { + + @Override + public void run(Context context) throws IOException, + InterruptedException { + setup(context); + if (context.nextKeyValue()) { + map(context.getCurrentKey(), context.getCurrentValue(), context); + while (context.nextKeyValue()) { + map(context.getCurrentKey(), context.getCurrentValue(), + context); + } + throw new IOException("There should have been no records"); + } + cleanup(context); + } + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + } + } +} Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java (revision 0) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java (revision 0) @@ -0,0 +1,609 @@ +package org.apache.hcatalog.hbase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hcatalog.cli.HCatDriver; +import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.DefaultHCatRecord; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.MapReadAbortedTransaction; +import org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.MapWriteAbortTransaction; +import org.apache.hcatalog.hbase.snapshot.FamilyRevision; +import org.apache.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hcatalog.hbase.snapshot.Transaction; +import org.apache.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hcatalog.mapreduce.InputJobInfo; +import org.apache.hcatalog.mapreduce.OutputJobInfo; + +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Tests components of HBaseHCatStorageHandler using ManyMiniCluster. + * Including ImprtSequenceFile and HBaseBulkOutputFormat + */ +public class TestHBaseBulkOutputFormat extends SkeletonHBaseTest { + private final static Log LOG = LogFactory.getLog(TestHBaseBulkOutputFormat.class); + + private final HiveConf allConf; + private final HCatDriver hcatDriver; + + public TestHBaseBulkOutputFormat() { + allConf = getHiveConf(); + allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); + allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(),"warehouse").toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) + allConf.set(el.getKey(), el.getValue()); + for (Map.Entry el : getJobConf()) + allConf.set(el.getKey(), el.getValue()); + + SessionState.start(new CliSessionState(allConf)); + hcatDriver = new HCatDriver(); + } + + public static class MapWriteOldMapper implements org.apache.hadoop.mapred.Mapper { + + @Override + public void close() throws IOException { + } + + @Override + public void configure(JobConf job) { + } + + @Override + public void map(LongWritable key, Text value, + OutputCollector output, + Reporter reporter) throws IOException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for(int i=1;i { + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for(int i=1;i { + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo)HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key",schema,Integer.parseInt(vals[0])); + for(int i=1;i abortedWriteTransactions = rm.getAbortedWriteTransactions( + databaseName + "." + tableName, family); + assertEquals(1, abortedWriteTransactions.size()); + assertEquals(1, abortedWriteTransactions.get(0).getRevision()); + } + } finally { + rm.close(); + } + + //verify that hbase does not have any of the records. + //Since records are only written during commitJob, + //hbase should not have any records. + HTable table = new HTable(conf, databaseName + "." + tableName); + Scan scan = new Scan(); + scan.addFamily(Bytes.toBytes(familyName)); + ResultScanner scanner = table.getScanner(scan); + assertFalse(scanner.iterator().hasNext()); + + // verify that the input storage driver returns empty results. + Path outputDir = new Path(getTestDir(), + "mapred/testHBaseTableBulkIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + job = new Job(conf, "hbase-bulk-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadAbortedTransaction.class); + job.setInputFormatClass(HCatInputFormat.class); + InputJobInfo inputJobInfo = InputJobInfo.create(databaseName, + tableName, null, null, null); + HCatInputFormat.setInput(job, inputJobInfo); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + } + + private Job configureJob(String jobName, Configuration conf, + Path workingDir, Class mapperClass, + OutputJobInfo outputJobInfo, Path inputPath) throws IOException { + Job job = new Job(conf, jobName); + job.setWorkingDirectory(workingDir); + job.setJarByClass(this.getClass()); + job.setMapperClass(mapperClass); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + return job; + } + +} + Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java (revision 0) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java (revision 0) @@ -0,0 +1,616 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hcatalog.hbase; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapreduce.TableInputFormat; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hcatalog.cli.HCatDriver; +import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatException; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hcatalog.hbase.snapshot.Transaction; +import org.apache.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hcatalog.mapreduce.InputJobInfo; +import org.apache.hcatalog.mapreduce.PartInfo; +import org.junit.Test; + +public class TestHBaseInputFormat extends SkeletonHBaseTest { + + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + private final byte[] FAMILY = Bytes.toBytes("testFamily"); + private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1"); + private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2"); + + private List generatePuts(int num, String tableName) throws IOException { + + List columnFamilies = Arrays.asList("testFamily"); + RevisionManager rm = null; + List myPuts; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(getHbaseConf()); + rm.open(); + myPuts = new ArrayList(); + for (int i = 1; i <= num; i++) { + Put put = new Put(Bytes.toBytes("testRow")); + put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i)); + put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i)); + myPuts.add(put); + Transaction tsx = rm.beginWriteTransaction(tableName, + columnFamilies); + rm.commitWriteTransaction(tsx); + } + } finally { + if (rm != null) + rm.close(); + } + + return myPuts; + } + + private void Initialize() throws Exception { + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + private void populateHBaseTable(String tName, int revisions) throws IOException { + List myPuts = generatePuts(revisions, tName); + HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); + table.put(myPuts); + } + + private long populateHBaseTableQualifier1(String tName, int value, Boolean commit) + throws IOException { + List columnFamilies = Arrays.asList("testFamily"); + RevisionManager rm = null; + List myPuts = new ArrayList(); + long revision; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(getHbaseConf()); + rm.open(); + Transaction tsx = rm.beginWriteTransaction(tName, columnFamilies); + + Put put = new Put(Bytes.toBytes("testRow")); + revision = tsx.getRevisionNumber(); + put.add(FAMILY, QUALIFIER1, revision, + Bytes.toBytes("textValue-" + value)); + myPuts.add(put); + + // If commit is null it is left as a running transaction + if (commit != null) { + if (commit) { + rm.commitWriteTransaction(tsx); + } else { + rm.abortWriteTransaction(tsx); + } + } + } finally { + if (rm != null) + rm.close(); + } + HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); + table.put(myPuts); + return revision; + } + + @Test + public void TestHBaseTableReadMR() throws Exception { + Initialize(); + String tableName = newTableName("mytable"); + String databaseName = newTableName("mydatabase"); + String db_dir = getTestDir() + "/hbasedb"; + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + + db_dir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" ; + + CommandProcessorResponse responseOne = hcatDriver.run(dbquery); + assertEquals(0, responseOne.getResponseCode()); + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + String hbaseTableName = databaseName + "." + tableName; + boolean doesTableExist = hAdmin.tableExists(hbaseTableName); + assertTrue(doesTableExist); + + populateHBaseTable(hbaseTableName, 5); + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + // create job + Job job = new Job(conf, "hbase-mr-read-test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTable.class); + MapReadHTable.resetCounters(); + + job.setInputFormatClass(HCatInputFormat.class); + InputJobInfo inputJobInfo = InputJobInfo.create(databaseName, tableName, + null, null, null); + HCatInputFormat.setInput(job, inputJobInfo); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Note: These asserts only works in case of LocalJobRunner as they run in same jvm. + // If using MiniMRCluster, the tests will have to be modified. + assertFalse(MapReadHTable.error); + assertEquals(MapReadHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + hbaseTableName ; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); + assertFalse(isHbaseTableThere); + + String dropDB = "DROP DATABASE " + databaseName; + CommandProcessorResponse responseFour = hcatDriver.run(dropDB); + assertEquals(0, responseFour.getResponseCode()); + } + + @Test + public void TestHBaseTableProjectionReadMR() throws Exception { + + Initialize(); + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')" ; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 5); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + // create job + Job job = new Job(conf, "hbase-column-projection"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadProjHTable.class); + job.setInputFormatClass(HCatInputFormat.class); + InputJobInfo inputJobInfo = InputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, null, null, + null); + HCatInputFormat.setOutputSchema(job, getProjectionSchema()); + HCatInputFormat.setInput(job, inputJobInfo); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + assertFalse(MapReadProjHTable.error); + assertEquals(MapReadProjHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + tableName ; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseInputFormatProjectionReadMR() throws Exception { + + Initialize(); + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')" ; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 5); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + // create job + JobConf job = new JobConf(conf); + job.setJobName("hbase-scan-column"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadProjectionHTable.class); + job.setInputFormat(HBaseInputFormat.class); + + InputJobInfo inputJobInfo = InputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, null, null, + null); + //Configure projection schema + job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); + Job newJob = new Job(job); + HCatInputFormat.setInput(newJob, inputJobInfo); + String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); + job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); + for (PartInfo partinfo : info.getPartitions()) { + for (Entry entry : partinfo.getJobProperties().entrySet()) + job.set(entry.getKey(), entry.getValue()); + } + assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); + + job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); + org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + assertFalse(MapReadProjHTable.error); + assertEquals(MapReadProjHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + tableName ; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseTableIgnoreAbortedTransactions() throws Exception { + Initialize(); + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')" ; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 5); + populateHBaseTableQualifier1(tableName, 6, false); + populateHBaseTableQualifier1(tableName, 7, false); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + Job job = new Job(conf, "hbase-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTable.class); + MapReadHTable.resetCounters(); + job.setInputFormatClass(HCatInputFormat.class); + InputJobInfo inputJobInfo = InputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, null, null, + null); + HCatInputFormat.setInput(job, inputJobInfo); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Verify that the records do not contain aborted transaction + // revisions 6 and 7 for testFamily:testQualifier1 and + // fetches revision 5 for both testQualifier1 and testQualifier2 + assertFalse(MapReadHTable.error); + assertEquals(1, MapReadHTable.count); + + String dropTableQuery = "DROP TABLE " + tableName ; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseTableIgnoreAbortedAndRunningTransactions() throws Exception { + Initialize(); + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')" ; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 2); + populateHBaseTableQualifier1(tableName, 3, null); //Running transaction + populateHBaseTableQualifier1(tableName, 4, Boolean.FALSE); //Aborted transaction + populateHBaseTableQualifier1(tableName, 5, Boolean.TRUE); //Committed transaction + populateHBaseTableQualifier1(tableName, 6, null); //Running Transaction + populateHBaseTableQualifier1(tableName, 7, Boolean.FALSE); //Aborted Transaction + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + Job job = new Job(conf, "hbase-running-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTableRunningAbort.class); + job.setInputFormatClass(HCatInputFormat.class); + InputJobInfo inputJobInfo = InputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, null, null, + null); + HCatInputFormat.setInput(job, inputJobInfo); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Verify that the records do not contain running and aborted transaction + // and it fetches revision 2 for testQualifier1 and testQualifier2 + assertFalse(MapReadHTableRunningAbort.error); + assertEquals(1, MapReadHTableRunningAbort.count); + + String dropTableQuery = "DROP TABLE " + tableName ; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + + static class MapReadHTable + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 3) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-5") + && (value.get(2).toString()).equalsIgnoreCase("textValue-5"); + + if (correctValues == false) { + error = true; + } + count++; + } + + public static void resetCounters() { + error = false; + count = 0; + } + } + + static class MapReadProjHTable + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 2) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-5"); + + if (correctValues == false) { + error = true; + } + count++; + } + } + + static class MapReadProjectionHTable + implements org.apache.hadoop.mapred.Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void configure(JobConf job) { + } + + @Override + public void close() throws IOException { + } + + @Override + public void map(ImmutableBytesWritable key, Result result, + OutputCollector, Text> output, Reporter reporter) + throws IOException { + System.out.println("Result " + result.toString()); + List list = result.list(); + boolean correctValues = (list.size() == 1) + && (Bytes.toString(list.get(0).getRow())).equalsIgnoreCase("testRow") + && (Bytes.toString(list.get(0).getValue())).equalsIgnoreCase("textValue-5") + && (Bytes.toString(list.get(0).getFamily())).equalsIgnoreCase("testFamily") + && (Bytes.toString(list.get(0).getQualifier())).equalsIgnoreCase("testQualifier1"); + + if (correctValues == false) { + error = true; + } + count++; + } + } + + static class MapReadHTableRunningAbort + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 3) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-2") + && (value.get(2).toString()).equalsIgnoreCase("textValue-2"); + + if (correctValues == false) { + error = true; + } + count++; + } + } + + private HCatSchema getProjectionSchema() throws HCatException { + + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING, + "")); + schema.append(new HCatFieldSchema("testqualifier1", + HCatFieldSchema.Type.STRING, "")); + return schema; + } + + +} Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseSerDeResultConverter.java =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseSerDeResultConverter.java (revision 1295948) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseSerDeResultConverter.java (working copy) @@ -1,195 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.hbase.HBaseSerDe; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.junit.Test; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -/** - * Test HBaseSerdeResultConverter by manually creating records to convert to and from HBase objects - */ -public class TestHBaseSerDeResultConverter { - - private Properties createProperties() { - Properties tbl = new Properties(); - // Set the configuration parameters - tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); - tbl.setProperty("columns","key,aint,astring,amap"); - tbl.setProperty("columns.types","string:int:string:map"); - tbl.setProperty(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX+"."+ HBaseSerDe.HBASE_COLUMNS_MAPPING, - ":key,my_family:my_qualifier1,my_family:my_qualifier2,my_family2:"); - tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL"); - return tbl; - } - - private HCatSchema createHCatSchema() throws HCatException { - HCatSchema subSchema = new HCatSchema(new ArrayList()); - subSchema.append(new HCatFieldSchema(null, HCatFieldSchema.Type.INT,"")); - - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING,"")); - schema.append(new HCatFieldSchema("aint", HCatFieldSchema.Type.INT,"")); - schema.append(new HCatFieldSchema("astring", HCatFieldSchema.Type.STRING,"")); - schema.append(new HCatFieldSchema("amap", HCatFieldSchema.Type.MAP,HCatFieldSchema.Type.STRING,subSchema,"")); - return schema; - } - - @Test - public void testDeserialize() throws IOException { - HBaseSerDeResultConverter converter = new HBaseSerDeResultConverter(createHCatSchema(), - null, - createProperties(), - 1l); - //test integer - Result result = new Result(new KeyValue[]{new KeyValue(Bytes.toBytes("row"), - Bytes.toBytes("my_family"), - Bytes.toBytes("my_qualifier1"), - 0, - //This is how Hive's SerDe serializes numbers - Bytes.toBytes("123")), - //test string - new KeyValue(Bytes.toBytes("row"), - Bytes.toBytes("my_family"), - Bytes.toBytes("my_qualifier2"), - 0, - Bytes.toBytes("onetwothree")), - //test family map - new KeyValue(Bytes.toBytes("row"), - Bytes.toBytes("my_family2"), - Bytes.toBytes("one"), - 0, - Bytes.toBytes("1")), - new KeyValue(Bytes.toBytes("row"), - Bytes.toBytes("my_family2"), - Bytes.toBytes("two"), - 0, - Bytes.toBytes("2"))}); - - HCatRecord record = converter.convert(result); - - assertEquals(Bytes.toString(result.getRow()), record.get(0).toString()); - assertEquals(Integer.valueOf( - Bytes.toString( - result.getValue(Bytes.toBytes("my_family"), Bytes.toBytes("my_qualifier1")))), - record.get(1)); - assertEquals(Bytes.toString( - result.getValue(Bytes.toBytes("my_family"), Bytes.toBytes("my_qualifier2"))), - record.get(2).toString()); - Map recordMap = (Map)record.get(3); - Map familyMap = result.getFamilyMap(Bytes.toBytes("my_family2")); - assertEquals(Integer.valueOf( - Bytes.toString( - familyMap.get(Bytes.toBytes("one")))), - recordMap.get("one")); - assertEquals(Integer.valueOf( - Bytes.toString( - familyMap.get(Bytes.toBytes("two")))), - recordMap.get("two")); - } - - @Test - public void testSerialize() throws IOException { - HCatSchema schema = createHCatSchema(); - HBaseSerDeResultConverter converter = new HBaseSerDeResultConverter(schema, - null, - createProperties(), - 1l); - HCatRecord in = new DefaultHCatRecord(4); - //row key - in.set(0,"row"); - //test integer - in.set(1,123); - //test string - in.set(2,"onetwothree"); - //test map - Map map = new HashMap(); - map.put("one",1); - map.put("two",2); - in.set(3,map); - - Put put = converter.convert(in); - - assertEquals(in.get(0).toString(),Bytes.toString(put.getRow())); - assertEquals(in.get(1), - Integer.valueOf( - Bytes.toString( - put.get(Bytes.toBytes("my_family"), - Bytes.toBytes("my_qualifier1")).get(0).getValue()))); - assertEquals(1l, - put.get(Bytes.toBytes("my_family"), - Bytes.toBytes("my_qualifier1")).get(0).getTimestamp()); - assertEquals(in.get(2), - Bytes.toString( - put.get(Bytes.toBytes("my_family"), - Bytes.toBytes("my_qualifier2")).get(0).getValue())); - assertEquals(1l, - put.get(Bytes.toBytes("my_family"), - Bytes.toBytes("my_qualifier2")).get(0).getTimestamp()); - assertEquals(map.get("one"), - Integer.valueOf( - Bytes.toString( - put.get(Bytes.toBytes("my_family2"), - Bytes.toBytes("one")).get(0).getValue()))); - assertEquals(1l, - put.get(Bytes.toBytes("my_family2"), - Bytes.toBytes("one")).get(0).getTimestamp()); - assertEquals(map.get("two"), - Integer.valueOf(Bytes.toString( - put.get("my_family2".getBytes(), - "two".getBytes()).get(0).getValue()))); - assertEquals(1l, - put.get(Bytes.toBytes("my_family2"), - Bytes.toBytes("two")).get(0).getTimestamp()); - } - - @Test - public void testScanColumns() throws IOException{ - HCatSchema schema = createHCatSchema(); - HBaseSerDeResultConverter converter = new HBaseSerDeResultConverter(schema, - null, - createProperties()); - - String result = converter.getHBaseScanColumns(); - String scanColumns = "my_family:my_qualifier1 my_family:my_qualifier2 my_family2: "; - - assertTrue(scanColumns.equals(result)); - - - } -} Index: storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputStorageDriver.java.broken =================================================================== --- storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputStorageDriver.java.broken (revision 1295948) +++ storage-drivers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputStorageDriver.java.broken (working copy) @@ -1,290 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.client.*; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.junit.Test; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Map; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -/** - * Test HBaseDirectOuputStorageDriver and HBaseDirectOUtputFormat using a MiniCluster - */ -public class TestHBaseDirectOutputStorageDriver extends SkeletonHBaseTest { - - private final HiveConf allConf; - private final HCatDriver hcatDriver; - - public TestHBaseDirectOutputStorageDriver() { - allConf = getHiveConf(); - allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); - allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(),"warehouse").toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) - allConf.set(el.getKey(), el.getValue()); - for (Map.Entry el : getJobConf()) - allConf.set(el.getKey(), el.getValue()); - - SessionState.start(new CliSessionState(allConf)); - hcatDriver = new HCatDriver(); - } - - @Test - public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { - String testName = "directOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(),testName); - - String tableName = newTableName(testName).toLowerCase(); - byte[] tableNameBytes = Bytes.toBytes(tableName); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - //create table - createTable(tableName,new String[]{familyName}); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:ONE,spanish:DOS", - "3,english:ONE,spanish:TRES"}; - - - - // input/output settings - Path inputPath = new Path(methodTestDir,"mr_input"); - getFileSystem().mkdirs(inputPath); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath,"inputFile.txt")); - for(String line: data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - - //create job - Job job = new Job(conf, testName); - job.setWorkingDirectory(new Path(methodTestDir,"mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapWrite.class); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - - job.setOutputFormatClass(HBaseDirectOutputFormat.class); - job.getConfiguration().set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); - - //manually create transaction - RevisionManager rm = HBaseHCatStorageHandler.getOpenedRevisionManager(conf); - try { - OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null, null, null); - Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - HCatUtil.serialize(txn)); - job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - } finally { - rm.close(); - } - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index=0; - for(Result result: scanner) { - String vals[] = data[index].toString().split(","); - for(int i=1;i { - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo)HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key",schema,Integer.parseInt(vals[0])); - for(int i=1;i { - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for(int i=1;i fields = HCatUtil.getFieldSchemaList(outputSchema.getFields()); - hcatProperties.setProperty(Constants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); - hcatProperties.setProperty(Constants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); - - context.getConfiguration().set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, qualifiedTableName); - - String txnString = outputJobInfo.getProperties().getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY); - if(txnString == null) { - HBaseConfiguration.addHbaseResources(context.getConfiguration()); - //outputSchema should be set by HCatOutputFormat calling setSchema, prior to initialize being called - //TODO reconcile output_revision passing to HBaseSerDeResultConverter - //on the first call to this method hcatProperties will not contain an OUTPUT_VERSION but that doesn't - //matter since we won't use any facilities that require that property set during that run - converter = new HBaseSerDeResultConverter(schema, - outputSchema, - hcatProperties); - RevisionManager rm = HBaseHCatStorageHandler.getOpenedRevisionManager(context.getConfiguration()); - Transaction txn = null; - try { - txn = rm.beginWriteTransaction(qualifiedTableName, - Arrays.asList(converter.getHBaseScanColumns().split(" "))); - } finally { - rm.close(); - } - outputJobInfo.getProperties() - .setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - HCatUtil.serialize(txn)); - } - else { - Transaction txn = (Transaction)HCatUtil.deserialize(txnString); - converter = new HBaseSerDeResultConverter(schema, - outputSchema, - hcatProperties, - txn.getRevisionNumber()); - } - } - - @Override - public void setSchema(JobContext jobContext, HCatSchema schema) throws IOException { - this.outputSchema = schema; - } - - @Override - public WritableComparable generateKey(HCatRecord value) throws IOException { - //HBase doesn't use KEY as part of output - return null; - } - - @Override - public Writable convertValue(HCatRecord value) throws IOException { - return converter.convert(value); - } - - @Override - public void setPartitionValues(JobContext jobContext, Map partitionValues) throws IOException { - //no partitions for this driver - } - - @Override - public Path getWorkFilePath(TaskAttemptContext context, String outputLoc) throws IOException { - return null; - } - - @Override - public void setOutputPath(JobContext jobContext, String location) throws IOException { - //no output path - } - - @Override - public String getOutputLocation(JobContext jobContext, String tableLocation, List partitionCols, Map partitionValues, String dynHash) throws IOException { - return null; - } -} Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputStorageDriver.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputStorageDriver.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputStorageDriver.java (working copy) @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; - -import java.io.IOException; -import java.util.Properties; - -/** - * HBase Storage driver implementation which uses "direct" writes to hbase for writing out records. - */ -public class HBaseDirectOutputStorageDriver extends HBaseBaseOutputStorageDriver { - - private HBaseDirectOutputFormat outputFormat; - - @Override - public void initialize(JobContext context, Properties hcatProperties) throws IOException { - super.initialize(context, hcatProperties); - context.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - outputFormat = new HBaseDirectOutputFormat(); - outputFormat.setConf(context.getConfiguration()); - } - - @Override - public OutputFormat, ? extends Writable> getOutputFormat() throws IOException { - return outputFormat; - } - -} Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputStorageDriver.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputStorageDriver.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputStorageDriver.java (working copy) @@ -1,277 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapreduce.TableInputFormat; -import org.apache.hadoop.hive.hbase.HBaseSerDe; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.mapreduce.HCatInputStorageDriver; -import org.apache.hcatalog.mapreduce.HCatTableInfo; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.StorerInfo; - - -/** - * The Class HBaseInputStorageDriver enables reading of HBase tables through - * HCatalog. - */ -public class HBaseInputStorageDriver extends HCatInputStorageDriver { - - private InputJobInfo inpJobInfo; - private ResultConverter converter; - private HCatSchema outputColSchema; - private HCatSchema dataSchema; - private Configuration jobConf; - private String scanColumns; - private HCatTableSnapshot snapshot; - - /* - * @param JobContext - * - * @param hcatProperties - * - * @see org.apache.hcatalog.mapreduce.HCatInputStorageDriver - * #initialize(org.apache.hadoop.mapreduce.JobContext, java.util.Properties) - */ - @Override - public void initialize(JobContext context, Properties hcatProperties) throws IOException { - - jobConf = context.getConfiguration(); - String jobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); - if (jobString == null) { - throw new IOException( - "InputJobInfo information not found in JobContext. " - + "HCatInputFormat.setInput() not called?"); - } - inpJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); - dataSchema = inpJobInfo.getTableInfo().getDataColumns(); - List fields = HCatUtil.getFieldSchemaList(dataSchema - .getFields()); - hcatProperties.setProperty(Constants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); - hcatProperties.setProperty(Constants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); - converter = new HBaseSerDeResultConverter(dataSchema, outputColSchema, - hcatProperties); - scanColumns = converter.getHBaseScanColumns(); - String hbaseTableName = HBaseHCatStorageHandler - .getFullyQualifiedName(inpJobInfo.getTableInfo()); - String serSnapshot = (String) inpJobInfo.getProperties().get( - HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); - if(serSnapshot == null){ - HBaseConfiguration.addHbaseResources(context.getConfiguration()); - snapshot = HBaseHCatStorageHandler.createSnapshot(jobConf, - hbaseTableName); - inpJobInfo.getProperties().setProperty( - HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, - HCatUtil.serialize(snapshot)); - } - - context.getConfiguration().set(HCatConstants.HCAT_KEY_JOB_INFO, HCatUtil.serialize(inpJobInfo)); - - } - - /* - * @param hcatProperties - * - * @return InputFormat - * - * @see org.apache.hcatalog.mapreduce.HCatInputStorageDriver - * #getInputFormat(java.util.Properties) - */ - @Override - public InputFormat getInputFormat( - Properties hcatProperties) { - - String hbaseTableName = HBaseHCatStorageHandler - .getFullyQualifiedName(inpJobInfo.getTableInfo()); - HBaseInputFormat tableInputFormat = new HBaseInputFormat(inpJobInfo); - jobConf.set(TableInputFormat.INPUT_TABLE, hbaseTableName); - jobConf.set(TableInputFormat.SCAN_COLUMNS, scanColumns); - jobConf.setInt(TableInputFormat.SCAN_MAXVERSIONS, 1); - tableInputFormat.setConf(jobConf); - // TODO: Make the caching configurable by the user - tableInputFormat.getScan().setCaching(200); - tableInputFormat.getScan().setCacheBlocks(false); - return tableInputFormat; - } - - /* - * @param baseKey The key produced by the MR job. - * - * @param baseValue The value produced by the MR job. - * - * @return HCatRecord An instance of HCatRecord produced by the key, value. - * - * @throws IOException - * - * @see - * org.apache.hcatalog.mapreduce.HCatInputStorageDriver#convertToHCatRecord - * (org.apache.hadoop.io.WritableComparable, org.apache.hadoop.io.Writable) - */ - @Override - public HCatRecord convertToHCatRecord(WritableComparable baseKey, - Writable baseValue) throws IOException { - return this.converter.convert((Result) baseValue); - } - - /* - * @param jobContext The jobcontext of MR job - * - * @param howlSchema The output schema of the hcat record. - * - * @throws IOException - * - * @see org.apache.hcatalog.mapreduce.HCatInputStorageDriver# - * setOutputSchema(org.apache.hadoop.mapreduce.JobContext, - * org.apache.hcatalog.data.schema.HCatSchema) - */ - @Override - public void setOutputSchema(JobContext jobContext, HCatSchema howlSchema) - throws IOException { - this.outputColSchema = howlSchema; - } - - /* - * @param jobContext - * - * @param partitionValues - * - * @throws IOException - * - * @see org.apache.hcatalog.mapreduce.HCatInputStorageDriver - * #setPartitionValues(org.apache.hadoop.mapreduce.JobContext, - * java.util.Map) - */ - @Override - public void setPartitionValues(JobContext jobContext, - Map partitionValues) throws IOException { - } - - /* - * @param jobContext The jobcontext of MR job. - * - * @param hcatSchema The schema of the hcat record. - * - * @throws IOException - * - * @see org.apache.hcatalog.mapreduce.HCatInputStorageDriver - * #setOriginalSchema(org.apache.hadoop.mapreduce.JobContext, - * org.apache.hcatalog.data.schema.HCatSchema) - */ - @Override - public void setOriginalSchema(JobContext jobContext, HCatSchema hcatSchema) - throws IOException { - this.dataSchema = hcatSchema; - } - - static HCatTableSnapshot convertSnapshot(TableSnapshot hbaseSnapshot, - HCatTableInfo hcatTableInfo) throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); - HashMap revisionMap = new HashMap(); - - for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { - if(hcatHbaseColMap.containsKey(fSchema.getName())){ - String colFamily = hcatHbaseColMap.get(fSchema.getName()); - long revisionID = hbaseSnapshot.getRevision(colFamily); - revisionMap.put(fSchema.getName(), revisionID); - } - } - - HCatTableSnapshot hcatSnapshot = new HCatTableSnapshot( - hcatTableInfo.getDatabaseName(), hcatTableInfo.getTableName(),revisionMap,hbaseSnapshot.getLatestRevision()); - return hcatSnapshot; - } - - static TableSnapshot convertSnapshot(HCatTableSnapshot hcatSnapshot, - HCatTableInfo hcatTableInfo) throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - Map revisionMap = new HashMap(); - Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); - for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { - String colFamily = hcatHbaseColMap.get(fSchema.getName()); - if (hcatSnapshot.containsColumn(fSchema.getName())) { - long revision = hcatSnapshot.getRevision(fSchema.getName()); - revisionMap.put(colFamily, revision); - } - } - - String fullyQualifiedName = hcatSnapshot.getDatabaseName() + "." - + hcatSnapshot.getTableName(); - return new TableSnapshot(fullyQualifiedName, revisionMap,hcatSnapshot.getLatestRevision()); - - } - - private static Map getHCatHBaseColumnMapping( HCatTableInfo hcatTableInfo) - throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - StorerInfo storeInfo = hcatTableInfo.getStorerInfo(); - String hbaseColumnMapping = storeInfo.getProperties().getProperty( - HBaseConstants.PROPERTY_COLUMN_MAPPING_KEY); - - Map hcatHbaseColMap = new HashMap(); - List columnFamilies = new ArrayList(); - List columnQualifiers = new ArrayList(); - try { - HBaseSerDe.parseColumnMapping(hbaseColumnMapping, columnFamilies, - null, columnQualifiers, null); - } catch (SerDeException e) { - throw new IOException("Exception while converting snapshots.", e); - } - - for (HCatFieldSchema column : hcatTableSchema.getFields()) { - int fieldPos = hcatTableSchema.getPosition(column.getName()); - String colFamily = columnFamilies.get(fieldPos); - if (colFamily.equals(HBaseSerDe.HBASE_KEY_COL) == false) { - hcatHbaseColMap.put(column.getName(), colFamily); - } - } - - return hcatHbaseColMap; - } - -} Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseOutputStorageDriver.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseOutputStorageDriver.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseOutputStorageDriver.java (working copy) @@ -1,103 +0,0 @@ -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatOutputStorageDriver; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -/** - * Forwarding HBaseOutputStorageDriver, actual implementation is decided by a configuration - * {@link HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY} which defaults to HBaseBulkOutputStorageDriver - */ -public class HBaseOutputStorageDriver extends HCatOutputStorageDriver { - - private HBaseBulkOutputStorageDriver bulkOSD = new HBaseBulkOutputStorageDriver(); - private HBaseDirectOutputStorageDriver directOSD = new HBaseDirectOutputStorageDriver(); - private HBaseBaseOutputStorageDriver activeOSD; - - @Override - public void initialize(JobContext context, Properties hcatProperties) throws IOException { - super.initialize(context, hcatProperties); - determineOSD(context.getConfiguration(),hcatProperties); - activeOSD.initialize(context,hcatProperties); - } - - @Override - public WritableComparable generateKey(HCatRecord value) throws IOException { - return activeOSD.generateKey(value); - } - - @Override - public Writable convertValue(HCatRecord value) throws IOException { - return activeOSD.convertValue(value); - } - - @Override - public String getOutputLocation(JobContext jobContext, String tableLocation, List partitionCols, Map partitionValues, String dynHash) throws IOException { - //sanity check since we can't determine which will be used till initialize - //and this method gets called before that - String l1 = bulkOSD.getOutputLocation(jobContext, tableLocation, partitionCols, partitionValues, dynHash); - String l2 = directOSD.getOutputLocation(jobContext, tableLocation, partitionCols, partitionValues, dynHash); - if(l1 != null || l2 != null) { - throw new IOException("bulkOSD or directOSD returns a non-null path for getOutputLocation()"); - } - return null; - } - - @Override - public Path getWorkFilePath(TaskAttemptContext context, String outputLoc) throws IOException { - return activeOSD.getWorkFilePath(context,outputLoc); - } - - @Override - public OutputFormat, ? extends Writable> getOutputFormat() throws IOException { - return activeOSD.getOutputFormat(); - } - - @Override - public void setOutputPath(JobContext jobContext, String location) throws IOException { - directOSD.setOutputPath(jobContext, location); - bulkOSD.setOutputPath(jobContext, location); - } - - @Override - public void setSchema(JobContext jobContext, HCatSchema schema) throws IOException { - directOSD.setSchema(jobContext,schema); - bulkOSD.setSchema(jobContext,schema); - } - - @Override - public void setPartitionValues(JobContext jobContext, Map partitionValues) throws IOException { - directOSD.setPartitionValues(jobContext,partitionValues); - bulkOSD.setPartitionValues(jobContext,partitionValues); - } - - private void determineOSD(Configuration conf, Properties prop) { - if(activeOSD != null) - return; - - String bulkMode = conf.get(HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY); - if(bulkMode == null && prop != null) - bulkMode = prop.getProperty(HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY); - - if(bulkMode != null && !Boolean.valueOf(bulkMode)) { - activeOSD = directOSD; - bulkOSD = null; - } - else { - activeOSD = bulkOSD; - directOSD = null; - } - } -} Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java (working copy) @@ -19,22 +19,24 @@ package org.apache.hcatalog.hbase; import java.io.IOException; +import java.io.Serializable; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Properties; +import java.util.Map.Entry; import java.util.Set; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hive.hbase.HBaseSerDe; @@ -50,13 +52,11 @@ import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.util.StringUtils; import org.apache.hcatalog.common.HCatConstants; import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.schema.HCatSchema; import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerFactory; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; import org.apache.hcatalog.hbase.snapshot.Transaction; import org.apache.hcatalog.hbase.snapshot.ZKBasedRevisionManager; import org.apache.hcatalog.mapreduce.HCatOutputFormat; @@ -74,22 +74,92 @@ * tables through HCatalog. The implementation is very similar to the * HiveHBaseStorageHandler, with more details to suit HCatalog. */ -public class HBaseHCatStorageHandler extends HCatStorageHandler implements HiveMetaHook { +public class HBaseHCatStorageHandler extends HCatStorageHandler implements HiveMetaHook, Serializable { - final static public String DEFAULT_PREFIX = "default."; + public final static String DEFAULT_PREFIX = "default."; + private final static String PROPERTY_INT_OUTPUT_LOCATION = "hcat.hbase.mapreduce.intermediateOutputLocation"; - private Configuration hbaseConf; + private transient Configuration hbaseConf; + private transient HBaseAdmin admin; - private HBaseAdmin admin; - @Override public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { - //TODO complete rework and fill this in + // Populate jobProperties with input table name, table columns, RM snapshot, + // hbase-default.xml and hbase-site.xml + Map tableJobProperties = tableDesc.getJobProperties(); + String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_JOB_INFO); + try { + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); + HCatTableInfo tableInfo = inputJobInfo.getTableInfo(); + String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedName(tableInfo); + jobProperties.put(TableInputFormat.INPUT_TABLE, qualifiedTableName); + + Configuration jobConf = getConf(); + String outputSchema = jobConf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); + jobProperties.put(TableInputFormat.SCAN_COLUMNS, getScanColumns(tableInfo, outputSchema)); + + String serSnapshot = (String) inputJobInfo.getProperties().get( + HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); + if (serSnapshot == null) { + Configuration conf = addHbaseResources(jobConf); + HCatTableSnapshot snapshot = HBaseRevisionManagerUtil.createSnapshot(conf, + qualifiedTableName, tableInfo); + jobProperties.put(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, + HCatUtil.serialize(snapshot)); + } + + addHbaseResources(jobConf, jobProperties); + + } catch (IOException e) { + throw new IllegalStateException("Error while configuring job properties", e); + } } @Override public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { - //TODO complete rework and fill this in + // Populate jobProperties with output table name, hbase-default.xml, hbase-site.xml, OutputJobInfo + // Populate RM transaction in OutputJobInfo + // In case of bulk mode, populate intermediate output location + Map tableJobProperties = tableDesc.getJobProperties(); + String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + try { + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); + HCatTableInfo tableInfo = outputJobInfo.getTableInfo(); + String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedName(tableInfo); + jobProperties.put(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, qualifiedTableName); + + Configuration jobConf = getConf(); + String txnString = outputJobInfo.getProperties().getProperty( + HBaseConstants.PROPERTY_WRITE_TXN_KEY); + if (txnString == null) { + Configuration conf = addHbaseResources(jobConf); + Transaction txn = HBaseRevisionManagerUtil.beginWriteTransaction(qualifiedTableName, tableInfo, conf); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + HCatUtil.serialize(txn)); + + if (isBulkMode(outputJobInfo) && !(outputJobInfo.getProperties() + .containsKey(PROPERTY_INT_OUTPUT_LOCATION))) { + String tableLocation = tableInfo.getTableLocation(); + String location = new Path(tableLocation, "REVISION_" + txn.getRevisionNumber()) + .toString(); + outputJobInfo.getProperties().setProperty(PROPERTY_INT_OUTPUT_LOCATION, + location); + // We are writing out an intermediate sequenceFile hence + // location is not passed in OutputJobInfo.getLocation() + // TODO replace this with a mapreduce constant when available + jobProperties.put("mapred.output.dir", location); + } + } + + jobProperties + .put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + addHbaseResources(jobConf, jobProperties); + addOutputDependencyJars(jobConf); + jobProperties.put("tmpjars", jobConf.get("tmpjars")); + + } catch (IOException e) { + throw new IllegalStateException("Error while configuring job properties", e); + } } /* @@ -231,7 +301,7 @@ new HTable(hbaseConf, tableDesc.getName()); //Set up znodes in revision manager. - RevisionManager rm = getOpenedRevisionManager(hbaseConf); + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); if (rm instanceof ZKBasedRevisionManager) { ZKBasedRevisionManager zkRM = (ZKBasedRevisionManager) rm; zkRM.setUpZNodes(tableName, new ArrayList( @@ -295,36 +365,6 @@ return this; } -//TODO finish rework remove this -// /* -// * @param tableDesc -// * -// * @param jobProperties -// * -// * @see org.apache.hcatalog.storagehandler.HCatStorageHandler -// * #configureTableJobProperties(org.apache.hadoop.hive.ql.plan.TableDesc, -// * java.util.Map) -// */ -// @Override -// public void configureTableJobProperties(TableDesc tableDesc, -// Map jobProperties) { -// Properties tableProperties = tableDesc.getProperties(); -// -// jobProperties.put(HBaseSerDe.HBASE_COLUMNS_MAPPING, -// tableProperties.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING)); -// -// String tableName = tableProperties -// .getProperty(HBaseSerDe.HBASE_TABLE_NAME); -// if (tableName == null) { -// tableName = tableProperties.getProperty(Constants.META_TABLE_NAME); -// if (tableName.startsWith(DEFAULT_PREFIX)) { -// tableName = tableName.substring(DEFAULT_PREFIX.length()); -// } -// } -// jobProperties.put(HBaseSerDe.HBASE_TABLE_NAME, tableName); -// -// } - private HBaseAdmin getHBaseAdmin() throws MetaException { try { if (admin == null) { @@ -356,14 +396,12 @@ @Override public Class getInputFormatClass() { - //TODO replace this with rework - return InputFormat.class; + return HBaseInputFormat.class; } @Override public Class getOutputFormatClass() { - //TODO replace this with rework - return SequenceFileOutputFormat.class; + return HBaseBaseOutputFormat.class; } /* @@ -397,6 +435,7 @@ private void checkDeleteTable(Table table) throws MetaException { boolean isExternal = MetaStoreUtils.isExternalTable(table); String tableName = getHBaseTableName(table); + RevisionManager rm = null; try { if (!isExternal && getHBaseAdmin().tableExists(tableName)) { // we have created an HBase table, so we delete it to roll back; @@ -406,7 +445,7 @@ getHBaseAdmin().deleteTable(tableName); //Set up znodes in revision manager. - RevisionManager rm = getOpenedRevisionManager(hbaseConf); + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); if (rm instanceof ZKBasedRevisionManager) { ZKBasedRevisionManager zkRM = (ZKBasedRevisionManager) rm; zkRM.deleteZNodes(tableName); @@ -414,6 +453,8 @@ } } catch (IOException ie) { throw new MetaException(StringUtils.stringifyException(ie)); + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); } } @@ -436,9 +477,7 @@ * @param conf * @throws IOException */ - public static void addDependencyJars(Configuration conf) throws IOException { - //TODO provide a facility/interface for loading/specifying dependencies - //Ideally this method shouldn't be exposed to the user + private void addOutputDependencyJars(Configuration conf) throws IOException { TableMapReduceUtil.addDependencyJars(conf, //hadoop-core Writable.class, @@ -452,8 +491,6 @@ HCatOutputFormat.class, //hive hbase storage handler jar HBaseSerDe.class, - //hcat hbase storage driver jar - HBaseOutputStorageDriver.class, //hive jar Table.class, //libthrift jar @@ -464,152 +501,86 @@ FacebookBase.class); } - /** - * Creates the latest snapshot of the table. - * - * @param jobConf The job configuration. - * @param hbaseTableName The fully qualified name of the HBase table. - * @return An instance of HCatTableSnapshot - * @throws IOException Signals that an I/O exception has occurred. + * Utility method to get a new Configuration with hbase-default.xml and hbase-site.xml added + * @param jobConf existing configuration + * @return a new Configuration with hbase-default.xml and hbase-site.xml added */ - public static HCatTableSnapshot createSnapshot(Configuration jobConf, - String hbaseTableName ) throws IOException { - - RevisionManager rm = null; - TableSnapshot snpt; - try { - rm = getOpenedRevisionManager(jobConf); - snpt = rm.createSnapshot(hbaseTableName); - } finally { - if (rm != null) - rm.close(); - } - - String inputJobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); - if(inputJobString == null){ - throw new IOException( - "InputJobInfo information not found in JobContext. " - + "HCatInputFormat.setInput() not called?"); - } - InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(inputJobString); - HCatTableSnapshot hcatSnapshot = HBaseInputStorageDriver - .convertSnapshot(snpt, inputInfo.getTableInfo()); - - return hcatSnapshot; + private Configuration addHbaseResources(Configuration jobConf) { + Configuration conf = new Configuration(jobConf); + HBaseConfiguration.addHbaseResources(conf); + return conf; } /** - * Creates the snapshot using the revision specified by the user. - * - * @param jobConf The job configuration. - * @param tableName The fully qualified name of the table whose snapshot is being taken. - * @param revision The revision number to use for the snapshot. - * @return An instance of HCatTableSnapshot. - * @throws IOException Signals that an I/O exception has occurred. + * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map + * if they are not already present in the jobConf. + * @param jobConf Job configuration + * @param newJobProperties Map to which new properties should be added */ - public static HCatTableSnapshot createSnapshot(Configuration jobConf, - String tableName, long revision) - throws IOException { - - TableSnapshot snpt; - RevisionManager rm = null; - try { - rm = getOpenedRevisionManager(jobConf); - snpt = rm.createSnapshot(tableName, revision); - } finally { - if (rm != null) - rm.close(); + private void addHbaseResources(Configuration jobConf, + Map newJobProperties) { + Configuration conf = new Configuration(false); + HBaseConfiguration.addHbaseResources(conf); + for (Entry entry : conf) { + if (jobConf.get(entry.getKey()) == null) + newJobProperties.put(entry.getKey(), entry.getValue()); } + } - String inputJobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); - if(inputJobString == null){ - throw new IOException( - "InputJobInfo information not found in JobContext. " - + "HCatInputFormat.setInput() not called?"); - } - InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(inputJobString); - HCatTableSnapshot hcatSnapshot = HBaseInputStorageDriver - .convertSnapshot(snpt, inputInfo.getTableInfo()); - - return hcatSnapshot; + public static boolean isBulkMode(OutputJobInfo outputJobInfo) { + //Default is false + String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties() + .getProperty(HBaseConstants.PROPERTY_OSD_BULK_MODE_KEY, + "false"); + return "true".equals(bulkMode); } - /** - * Gets an instance of revision manager which is opened. - * - * @param jobConf The job configuration. - * @return RevisionManager An instance of revision manager. - * @throws IOException - */ - static RevisionManager getOpenedRevisionManager(Configuration jobConf) throws IOException { - - Properties properties = new Properties(); - String zkHostList = jobConf.get(HConstants.ZOOKEEPER_QUORUM); - int port = jobConf.getInt("hbase.zookeeper.property.clientPort", - HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); - - if (zkHostList != null) { - String[] splits = zkHostList.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - sb.append(','); + private String getScanColumns(HCatTableInfo tableInfo, String outputColSchema) throws IOException { + StringBuilder builder = new StringBuilder(); + String hbaseColumnMapping = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseConstants.PROPERTY_COLUMN_MAPPING_KEY); + if (outputColSchema == null) { + String[] splits = hbaseColumnMapping.split("[,]"); + for (int i = 0; i < splits.length; i++) { + if (!splits[i].equals(HBaseSerDe.HBASE_KEY_COL)) + builder.append(splits[i]).append(" "); } - - sb.deleteCharAt(sb.length() - 1); - properties.put(ZKBasedRevisionManager.HOSTLIST, sb.toString()); + } else { + HCatSchema outputSchema = (HCatSchema) HCatUtil.deserialize(outputColSchema); + HCatSchema tableSchema = tableInfo.getDataColumns(); + List outputFieldNames = outputSchema.getFieldNames(); + List outputColumnMapping = new ArrayList(); + for(String fieldName: outputFieldNames){ + int position = tableSchema.getPosition(fieldName); + outputColumnMapping.add(position); + } + try { + List columnFamilies = new ArrayList(); + List columnQualifiers = new ArrayList(); + HBaseSerDe.parseColumnMapping(hbaseColumnMapping, columnFamilies, null, + columnQualifiers, null); + for (int i = 0; i < outputColumnMapping.size(); i++) { + int cfIndex = outputColumnMapping.get(i); + String cf = columnFamilies.get(cfIndex); + // We skip the key column. + if (cf.equals(HBaseSerDe.HBASE_KEY_COL) == false) { + String qualifier = columnQualifiers.get(i); + builder.append(cf); + builder.append(":"); + if (qualifier != null) { + builder.append(qualifier); + } + builder.append(" "); + } + } + } catch (SerDeException e) { + throw new IOException(e); + } } - String dataDir = jobConf.get(ZKBasedRevisionManager.DATADIR); - if (dataDir != null) { - properties.put(ZKBasedRevisionManager.DATADIR, dataDir); - } - String rmClassName = jobConf.get( - RevisionManager.REVISION_MGR_IMPL_CLASS, - ZKBasedRevisionManager.class.getName()); - properties.put(RevisionManager.REVISION_MGR_IMPL_CLASS, rmClassName); - RevisionManager revisionManger = RevisionManagerFactory - .getRevisionManager(properties); - revisionManger.open(); - return revisionManger; + //Remove the extra space delimiter + builder.deleteCharAt(builder.length() - 1); + return builder.toString(); } - /** - * Set snapshot as a property. - * - * @param snapshot The HCatTableSnapshot to be passed to the job. - * @param inpJobInfo The InputJobInfo for the job. - * @throws IOException - */ - public void setSnapshot(HCatTableSnapshot snapshot, InputJobInfo inpJobInfo) - throws IOException { - String serializedSnp = HCatUtil.serialize(snapshot); - inpJobInfo.getProperties().setProperty( - HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, serializedSnp); - } - - static Transaction getWriteTransaction(Configuration conf) throws IOException { - OutputJobInfo outputJobInfo = (OutputJobInfo)HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - return (Transaction) HCatUtil.deserialize(outputJobInfo.getProperties() - .getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); - } - - static void setWriteTransaction(Configuration conf, Transaction txn) throws IOException { - OutputJobInfo outputJobInfo = (OutputJobInfo)HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - } - - /** - * Get the Revision number that will be assigned to this job's output data - * @param conf configuration of the job - * @return the revision number used - * @throws IOException - */ - public static long getOutputRevision(Configuration conf) throws IOException { - return getWriteTransaction(conf).getRevisionNumber(); - } - } Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java (working copy) @@ -18,104 +18,135 @@ package org.apache.hcatalog.hbase; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; -import org.apache.hadoop.io.Writable; +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.mapred.TableOutputFormat; import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.OutputCommitter; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TaskAttemptContext; +import org.apache.hadoop.util.Progressable; import org.apache.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hcatalog.hbase.snapshot.Transaction; - -import java.io.IOException; - /** - * "Direct" implementation of OutputFormat for HBase. Uses HTable client's put API to write each row to HBase one a - * time. Presently it is just using TableOutputFormat as the underlying implementation in the future we can - * tune this to make the writes faster such as permanently disabling WAL, caching, etc. + * "Direct" implementation of OutputFormat for HBase. Uses HTable client's put + * API to write each row to HBase one a time. Presently it is just using + * TableOutputFormat as the underlying implementation in the future we can tune + * this to make the writes faster such as permanently disabling WAL, caching, + * etc. */ -class HBaseDirectOutputFormat extends OutputFormat,Writable> implements Configurable { +class HBaseDirectOutputFormat extends HBaseBaseOutputFormat { - private TableOutputFormat> outputFormat; + private TableOutputFormat outputFormat; public HBaseDirectOutputFormat() { - this.outputFormat = new TableOutputFormat>(); + this.outputFormat = new TableOutputFormat(); } @Override - public RecordWriter, Writable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - return outputFormat.getRecordWriter(context); + public RecordWriter, Put> getRecordWriter(FileSystem ignored, + JobConf job, String name, Progressable progress) + throws IOException { + long version = HBaseRevisionManagerUtil.getOutputRevision(job); + return new HBaseDirectRecordWriter(outputFormat.getRecordWriter(ignored, job, name, + progress), version); } @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - outputFormat.checkOutputSpecs(context); + public void checkOutputSpecs(FileSystem ignored, JobConf job) + throws IOException { + job.setOutputCommitter(HBaseDirectOutputCommitter.class); + job.setIfUnset(TableOutputFormat.OUTPUT_TABLE, + job.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY)); + outputFormat.checkOutputSpecs(ignored, job); } - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - return new HBaseDirectOutputCommitter(outputFormat.getOutputCommitter(context)); - } + private static class HBaseDirectRecordWriter implements + RecordWriter, Put> { - @Override - public void setConf(Configuration conf) { - String tableName = conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY); - conf = new Configuration(conf); - conf.set(TableOutputFormat.OUTPUT_TABLE,tableName); - outputFormat.setConf(conf); - } + private RecordWriter, Put> baseWriter; + private final Long outputVersion; - @Override - public Configuration getConf() { - return outputFormat.getConf(); + public HBaseDirectRecordWriter( + RecordWriter, Put> baseWriter, + Long outputVersion) { + this.baseWriter = baseWriter; + this.outputVersion = outputVersion; + } + + @Override + public void write(WritableComparable key, Put value) + throws IOException { + Put put = value; + if (outputVersion != null) { + put = new Put(value.getRow(), outputVersion.longValue()); + for (List row : value.getFamilyMap().values()) { + for (KeyValue el : row) { + put.add(el.getFamily(), el.getQualifier(), el.getValue()); + } + } + } + baseWriter.write(key, put); + } + + @Override + public void close(Reporter reporter) throws IOException { + baseWriter.close(reporter); + } + } - private static class HBaseDirectOutputCommitter extends OutputCommitter { - private OutputCommitter baseOutputCommitter; + public static class HBaseDirectOutputCommitter extends OutputCommitter { - public HBaseDirectOutputCommitter(OutputCommitter baseOutputCommitter) throws IOException { - this.baseOutputCommitter = baseOutputCommitter; + public HBaseDirectOutputCommitter() throws IOException { } @Override - public void abortTask(TaskAttemptContext context) throws IOException { - baseOutputCommitter.abortTask(context); + public void abortTask(TaskAttemptContext taskContext) + throws IOException { } @Override - public void commitTask(TaskAttemptContext context) throws IOException { - baseOutputCommitter.commitTask(context); + public void commitTask(TaskAttemptContext taskContext) + throws IOException { } @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return baseOutputCommitter.needsTaskCommit(context); + public boolean needsTaskCommit(TaskAttemptContext taskContext) + throws IOException { + return false; } @Override - public void setupJob(JobContext context) throws IOException { - baseOutputCommitter.setupJob(context); + public void setupJob(JobContext jobContext) throws IOException { } @Override - public void setupTask(TaskAttemptContext context) throws IOException { - baseOutputCommitter.setupTask(context); + public void setupTask(TaskAttemptContext taskContext) + throws IOException { } @Override - public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { + public void abortJob(JobContext jobContext, int status) + throws IOException { + super.abortJob(jobContext, status); RevisionManager rm = null; try { - baseOutputCommitter.abortJob(jobContext, state); - rm = HBaseHCatStorageHandler.getOpenedRevisionManager(jobContext.getConfiguration()); - rm.abortWriteTransaction(HBaseHCatStorageHandler.getWriteTransaction(jobContext.getConfiguration())); + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + Transaction writeTransaction = HBaseRevisionManagerUtil + .getWriteTransaction(jobContext.getConfiguration()); + rm.abortWriteTransaction(writeTransaction); } finally { - if(rm != null) + if (rm != null) rm.close(); } } @@ -124,11 +155,12 @@ public void commitJob(JobContext jobContext) throws IOException { RevisionManager rm = null; try { - baseOutputCommitter.commitJob(jobContext); - rm = HBaseHCatStorageHandler.getOpenedRevisionManager(jobContext.getConfiguration()); - rm.commitWriteTransaction(HBaseHCatStorageHandler.getWriteTransaction(jobContext.getConfiguration())); + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(jobContext + .getConfiguration())); } finally { - if(rm != null) + if (rm != null) rm.close(); } } Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java (revision 0) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java (revision 0) @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hcatalog.hbase; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.util.Progressable; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.mapreduce.OutputJobInfo; + +public class HBaseBaseOutputFormat implements OutputFormat, Put>, + HiveOutputFormat, Put> { + + @Override + public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( + JobConf jc, Path finalOutPath, + Class valueClass, boolean isCompressed, + Properties tableProperties, Progressable progress) + throws IOException { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { + OutputFormat, Put> outputFormat = getOutputFormat(job); + outputFormat.checkOutputSpecs(ignored, job); + } + + @Override + public RecordWriter, Put> getRecordWriter(FileSystem ignored, + JobConf job, String name, Progressable progress) throws IOException { + OutputFormat, Put> outputFormat = getOutputFormat(job); + return outputFormat.getRecordWriter(ignored, job, name, progress); + } + + private OutputFormat, Put> getOutputFormat(JobConf job) + throws IOException { + String outputInfo = job.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(outputInfo); + OutputFormat, Put> outputFormat = null; + if (HBaseHCatStorageHandler.isBulkMode(outputJobInfo)) { + outputFormat = new HBaseBulkOutputFormat(); + } else { + outputFormat = new HBaseDirectOutputFormat(); + } + return outputFormat; + } +} Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java (working copy) @@ -21,33 +21,31 @@ import java.io.IOException; import java.util.List; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapred.TableSplit; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; -import org.apache.hadoop.hbase.mapreduce.TableSplit; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapred.HCatMapRedUtil; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; import org.apache.hcatalog.mapreduce.InputJobInfo; /** * This class HBaseInputFormat is a wrapper class of TableInputFormat in HBase. */ -class HBaseInputFormat extends InputFormat implements Configurable{ +class HBaseInputFormat implements InputFormat { private final TableInputFormat inputFormat; - private final InputJobInfo jobInfo; - private Configuration conf; - public HBaseInputFormat(InputJobInfo jobInfo) { + public HBaseInputFormat() { inputFormat = new TableInputFormat(); - this.jobInfo = jobInfo; } /* @@ -67,20 +65,27 @@ * org.apache.hadoop.mapreduce.TaskAttemptContext) */ @Override - public RecordReader createRecordReader( - InputSplit split, TaskAttemptContext tac) throws IOException, - InterruptedException { + public RecordReader getRecordReader( + InputSplit split, JobConf job, Reporter reporter) + throws IOException { + String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); - String tableName = inputFormat.getConf().get(TableInputFormat.INPUT_TABLE); - TableSplit tSplit = (TableSplit) split; - HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(jobInfo); - Scan sc = new Scan(inputFormat.getScan()); - sc.setStartRow(tSplit.getStartRow()); - sc.setStopRow(tSplit.getEndRow()); - recordReader.setScan(sc); - recordReader.setHTable(new HTable(this.conf, tableName)); - recordReader.init(); - return recordReader; + String tableName = job.get(TableInputFormat.INPUT_TABLE); + TableSplit tSplit = (TableSplit) split; + HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job); + inputFormat.setConf(job); + Scan inputScan = inputFormat.getScan(); + // TODO: Make the caching configurable by the user + inputScan.setCaching(200); + inputScan.setCacheBlocks(false); + Scan sc = new Scan(inputScan); + sc.setStartRow(tSplit.getStartRow()); + sc.setStopRow(tSplit.getEndRow()); + recordReader.setScan(sc); + recordReader.setHTable(new HTable(job, tableName)); + recordReader.init(); + return recordReader; } /* @@ -97,35 +102,24 @@ * .JobContext) */ @Override - public List getSplits(JobContext jobContext) - throws IOException, InterruptedException { + public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) + throws IOException { + inputFormat.setConf(job); + return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, + Reporter.NULL))); + } - String tableName = this.conf.get(TableInputFormat.INPUT_TABLE); - if (tableName == null) { - throw new IOException("The input table is not set. The input splits cannot be created."); + private InputSplit[] convertSplits(List splits) { + InputSplit[] converted = new InputSplit[splits.size()]; + for (int i = 0; i < splits.size(); i++) { + org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit = + (org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i); + TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(), + tableSplit.getStartRow(), + tableSplit.getEndRow(), tableSplit.getRegionLocation()); + converted[i] = newTableSplit; } - return inputFormat.getSplits(jobContext); + return converted; } - public void setConf(Configuration conf) { - this.conf = conf; - inputFormat.setConf(conf); - } - - public Scan getScan() { - return inputFormat.getScan(); - } - - public void setScan(Scan scan) { - inputFormat.setScan(scan); - } - - /* @return - * @see org.apache.hadoop.conf.Configurable#getConf() - */ - @Override - public Configuration getConf() { - return this.conf; - } - } Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java (working copy) @@ -25,7 +25,7 @@ * family and stored in the corresponding znode. When a write transaction is * committed, the transaction object is removed from the list. */ -class FamilyRevision implements +public class FamilyRevision implements Comparable { private long revision; @@ -42,11 +42,11 @@ this.timestamp = ts; } - long getRevision() { + public long getRevision() { return revision; } - long getExpireTimestamp() { + public long getExpireTimestamp() { return timestamp; } Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java (working copy) @@ -89,6 +89,17 @@ throws IOException; /** + * Get the list of aborted Transactions for a column family + * + * @param table the table name + * @param columnFamily the column family name + * @return a list of aborted WriteTransactions + * @throws java.io.IOException + */ + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException; + + /** * Create the latest snapshot of the table. * * @param tableName Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java (working copy) @@ -365,14 +365,8 @@ return zkUtil.getTransactionList(path); } - /** - * Get the list of aborted Transactions for a column family - * @param table the table name - * @param columnFamily the column family name - * @return a list of aborted WriteTransactions - * @throws java.io.IOException - */ - List getAbortedWriteTransactions(String table, + @Override + public List getAbortedWriteTransactions(String table, String columnFamily) throws IOException { String path = PathUtil.getAbortInformationPath(baseDir, table, columnFamily); return zkUtil.getTransactionList(path); Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputStorageDriver.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputStorageDriver.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputStorageDriver.java (working copy) @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.hbase.snapshot.Transaction; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; - - -/** - * Storage driver which works with {@link HBaseBulkOutputFormat} and makes use - * of HBase's "bulk load" feature to get data into HBase. This should be - * efficient for large batch writes in comparison to HBaseDirectOutputStorageDriver. - */ -public class HBaseBulkOutputStorageDriver extends HBaseBaseOutputStorageDriver { - private String PROPERTY_TABLE_LOCATION = "hcat.hbase.mapreduce.table.location"; - private String PROPERTY_INT_OUTPUT_LOCATION = "hcat.hbase.mapreduce.intermediateOutputLocation"; - private OutputFormat outputFormat; - private final static ImmutableBytesWritable EMPTY_KEY = new ImmutableBytesWritable(new byte[0]); - - @Override - public void initialize(JobContext context, Properties hcatProperties) throws IOException { - super.initialize(context, hcatProperties); - - //initialize() gets called multiple time in the lifecycle of an MR job, client, mapper, reducer, etc - //depending on the case we have to make sure for some context variables we set here that they don't get set again - if(!outputJobInfo.getProperties().containsKey(PROPERTY_INT_OUTPUT_LOCATION)) { - Transaction txn = (Transaction) - HCatUtil.deserialize(outputJobInfo.getProperties().getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); - String tableLocation = context.getConfiguration().get(PROPERTY_TABLE_LOCATION); - String location = new Path(tableLocation, "REVISION_"+txn.getRevisionNumber()).toString(); - outputJobInfo.getProperties().setProperty(PROPERTY_INT_OUTPUT_LOCATION, location); - //We are writing out an intermediate sequenceFile hence location is not passed in OutputJobInfo.getLocation() - //TODO replace this with a mapreduce constant when available - context.getConfiguration().set("mapred.output.dir", location); - //Temporary fix until support for secure hbase is available - //We need the intermediate directory to be world readable - //so that the hbase user can import the generated hfiles - if(context.getConfiguration().getBoolean("hadoop.security.authorization",false)) { - Path p = new Path(tableLocation); - FileSystem fs = FileSystem.get(context.getConfiguration()); - fs.setPermission(new Path(tableLocation), - FsPermission.valueOf("drwx--x--x")); - while((p = p.getParent()) != null) { - if(!fs.getFileStatus(p).getPermission().getOtherAction().implies(FsAction.EXECUTE)) - throw new IOException("Table's parent directories must at least have global execute permissions."); - } - } - } - - outputFormat = new HBaseBulkOutputFormat(); - context.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - } - - @Override - public OutputFormat, ? extends Writable> getOutputFormat() throws IOException { - return outputFormat; - } - - @Override - public WritableComparable generateKey(HCatRecord value) throws IOException { - return EMPTY_KEY; - } - - @Override - public String getOutputLocation(JobContext jobContext, String tableLocation, List partitionCols, Map partitionValues, String dynHash) throws IOException { - //TODO have HCatalog common objects expose more information - //this is the only way to pickup table location for storageDrivers - jobContext.getConfiguration().set(PROPERTY_TABLE_LOCATION, tableLocation); - return null; - } -} Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -27,15 +28,20 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapreduce.TableRecordReader; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.mapred.RecordReader; import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.hbase.snapshot.FamilyRevision; +import org.apache.hcatalog.hbase.snapshot.RevisionManager; import org.apache.hcatalog.hbase.snapshot.TableSnapshot; import org.apache.hcatalog.mapreduce.InputJobInfo; @@ -43,59 +49,92 @@ * The Class HbaseSnapshotRecordReader implements logic for filtering records * based on snapshot. */ -class HbaseSnapshotRecordReader extends TableRecordReader { +class HbaseSnapshotRecordReader implements RecordReader { static final Log LOG = LogFactory.getLog(HbaseSnapshotRecordReader.class); + private final InputJobInfo inpJobInfo; + private final Configuration conf; + private final int maxRevisions = 1; private ResultScanner scanner; private Scan scan; private HTable htable; - private ImmutableBytesWritable key; - private Result value; - private InputJobInfo inpJobInfo; private TableSnapshot snapshot; - private int maxRevisions; private Iterator resultItr; + private Set allAbortedTransactions; + private DataOutputBuffer valueOut = new DataOutputBuffer(); + private DataInputBuffer valueIn = new DataInputBuffer(); - - HbaseSnapshotRecordReader(InputJobInfo inputJobInfo) throws IOException { + HbaseSnapshotRecordReader(InputJobInfo inputJobInfo, Configuration conf) throws IOException { this.inpJobInfo = inputJobInfo; - String snapshotString = inpJobInfo.getProperties().getProperty( - HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); + this.conf = conf; + String snapshotString = conf.get(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); HCatTableSnapshot hcatSnapshot = (HCatTableSnapshot) HCatUtil .deserialize(snapshotString); - this.snapshot = HBaseInputStorageDriver.convertSnapshot(hcatSnapshot, + this.snapshot = HBaseRevisionManagerUtil.convertSnapshot(hcatSnapshot, inpJobInfo.getTableInfo()); - this.maxRevisions = 1; } - /* @param firstRow The first record in the split. - /* @throws IOException - * @see org.apache.hadoop.hbase.mapreduce.TableRecordReader#restart(byte[]) - */ - @Override + public void init() throws IOException { + restart(scan.getStartRow()); + } + public void restart(byte[] firstRow) throws IOException { + allAbortedTransactions = getAbortedTransactions(Bytes.toString(htable.getTableName()), scan); + long maxValidRevision = snapshot.getLatestRevision(); + while (allAbortedTransactions.contains(maxValidRevision)) { + maxValidRevision--; + } + long minValidRevision = getMinimumRevision(scan, snapshot); + while (allAbortedTransactions.contains(minValidRevision)) { + minValidRevision--; + } Scan newScan = new Scan(scan); newScan.setStartRow(firstRow); + //TODO: See if filters in 0.92 can be used to optimize the scan + newScan.setTimeRange(minValidRevision, maxValidRevision + 1); + newScan.setMaxVersions(); this.scanner = this.htable.getScanner(newScan); resultItr = this.scanner.iterator(); } - /* @throws IOException - * @see org.apache.hadoop.hbase.mapreduce.TableRecordReader#init() - */ - @Override - public void init() throws IOException { - restart(scan.getStartRow()); + private Set getAbortedTransactions(String tableName, Scan scan) throws IOException { + Set abortedTransactions = new HashSet(); + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + byte[][] families = scan.getFamilies(); + for (byte[] familyKey : families) { + String family = Bytes.toString(familyKey); + List abortedWriteTransactions = rm.getAbortedWriteTransactions( + tableName, family); + if (abortedWriteTransactions != null) { + for (FamilyRevision revision : abortedWriteTransactions) { + abortedTransactions.add(revision.getRevision()); + } + } + } + return abortedTransactions; + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); + } } + private long getMinimumRevision(Scan scan, TableSnapshot snapshot) { + long minRevision = snapshot.getLatestRevision(); + byte[][] families = scan.getFamilies(); + for (byte[] familyKey : families) { + String family = Bytes.toString(familyKey); + long revision = snapshot.getRevision(family); + if (revision < minRevision) + minRevision = revision; + } + return minRevision; + } + /* * @param htable The HTable ( of HBase) to use for the record reader. * - * @see - * org.apache.hadoop.hbase.mapreduce.TableRecordReader#setHTable(org.apache - * .hadoop.hbase.client.HTable) */ - @Override public void setHTable(HTable htable) { this.htable = htable; } @@ -103,64 +142,51 @@ /* * @param scan The scan to be used for reading records. * - * @see - * org.apache.hadoop.hbase.mapreduce.TableRecordReader#setScan(org.apache - * .hadoop.hbase.client.Scan) */ - @Override public void setScan(Scan scan) { this.scan = scan; } - /* - * @see org.apache.hadoop.hbase.mapreduce.TableRecordReader#close() - */ @Override - public void close() { - this.resultItr = null; - this.scanner.close(); + public ImmutableBytesWritable createKey() { + return new ImmutableBytesWritable(); } - /* @return The row of hbase record. - /* @throws IOException - /* @throws InterruptedException - * @see org.apache.hadoop.hbase.mapreduce.TableRecordReader#getCurrentKey() - */ @Override - public ImmutableBytesWritable getCurrentKey() throws IOException, - InterruptedException { - return key; + public Result createValue() { + return new Result(); } - /* @return Single row result of scan of HBase table. - /* @throws IOException - /* @throws InterruptedException - * @see org.apache.hadoop.hbase.mapreduce.TableRecordReader#getCurrentValue() - */ @Override - public Result getCurrentValue() throws IOException, InterruptedException { - return value; + public long getPos() { + // This should be the ordinal tuple in the range; + // not clear how to calculate... + return 0; } - /* @return Returns whether a next key-value is available for reading. - * @see org.apache.hadoop.hbase.mapreduce.TableRecordReader#nextKeyValue() - */ @Override - public boolean nextKeyValue() { + public float getProgress() throws IOException { + // Depends on the total number of tuples + return 0; + } + @Override + public boolean next(ImmutableBytesWritable key, Result value) throws IOException { if (this.resultItr == null) { LOG.warn("The HBase result iterator is found null. It is possible" + " that the record reader has already been closed."); } else { - - if (key == null) - key = new ImmutableBytesWritable(); while (resultItr.hasNext()) { Result temp = resultItr.next(); Result hbaseRow = prepareResult(temp.list()); if (hbaseRow != null) { + // Update key and value. Currently no way to avoid serialization/de-serialization + // as no setters are available. key.set(hbaseRow.getRow()); - value = hbaseRow; + valueOut.reset(); + hbaseRow.write(valueOut); + valueIn.reset(valueOut.getData(), valueOut.getLength()); + value.readFields(valueIn); return true; } @@ -185,6 +211,11 @@ } String family = Bytes.toString(kv.getFamily()); + //Ignore aborted transactions + if (allAbortedTransactions.contains(kv.getTimestamp())) { + continue; + } + long desiredTS = snapshot.getRevision(family); if (kv.getTimestamp() <= desiredTS) { kvs.add(kv); @@ -213,13 +244,13 @@ } } - /* @return The progress of the record reader. - * @see org.apache.hadoop.hbase.mapreduce.TableRecordReader#getProgress() + /* + * @see org.apache.hadoop.hbase.mapred.TableRecordReader#close() */ @Override - public float getProgress() { - // Depends on the total number of tuples - return 0; + public void close() { + this.resultItr = null; + this.scanner.close(); } } Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java (working copy) @@ -1,160 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hcatalog.hbase; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import java.io.IOException; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileOutputCommitter; +import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.hcatalog.common.HCatUtil; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.OutputCommitter; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TaskAttemptContext; +import org.apache.hadoop.util.Progressable; import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerFactory; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import java.io.IOException; - /** - * Class which imports data into HBase via it's "bulk load" feature. Wherein regions - * are created by the MR job using HFileOutputFormat and then later "moved" into - * the appropriate region server. + * Class which imports data into HBase via it's "bulk load" feature. Wherein + * regions are created by the MR job using HFileOutputFormat and then later + * "moved" into the appropriate region server. */ -class HBaseBulkOutputFormat extends OutputFormat,Put> { - private final static ImmutableBytesWritable EMPTY_LIST = new ImmutableBytesWritable(new byte[0]); - private SequenceFileOutputFormat,Put> baseOutputFormat; - private final static Log LOG = LogFactory.getLog(HBaseBulkOutputFormat.class); +class HBaseBulkOutputFormat extends HBaseBaseOutputFormat { + private final static ImmutableBytesWritable EMPTY_LIST = new ImmutableBytesWritable( + new byte[0]); + private SequenceFileOutputFormat, Put> baseOutputFormat; + public HBaseBulkOutputFormat() { - baseOutputFormat = new SequenceFileOutputFormat,Put>(); + baseOutputFormat = new SequenceFileOutputFormat, Put>(); } @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - baseOutputFormat.checkOutputSpecs(context); - //Get jobTracker delegation token if security is enabled - //we need to launch the ImportSequenceFile job - if(context.getConfiguration().getBoolean("hadoop.security.authorization",false)) { - JobClient jobClient = new JobClient(new JobConf(context.getConfiguration())); - context.getCredentials().addToken(new Text("my mr token"), jobClient.getDelegationToken(null)); - } + public void checkOutputSpecs(FileSystem ignored, JobConf job) + throws IOException { + job.setOutputKeyClass(ImmutableBytesWritable.class); + job.setOutputValueClass(Put.class); + job.setOutputCommitter(HBaseBulkOutputCommitter.class); + baseOutputFormat.checkOutputSpecs(ignored, job); + getJTDelegationToken(job); } @Override - public RecordWriter, Put> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - //TODO use a constant/static setter when available - context.getConfiguration().setClass("mapred.output.key.class",ImmutableBytesWritable.class,Object.class); - context.getConfiguration().setClass("mapred.output.value.class",Put.class,Object.class); - return new HBaseBulkRecordWriter(baseOutputFormat.getRecordWriter(context)); + public RecordWriter, Put> getRecordWriter( + FileSystem ignored, JobConf job, String name, Progressable progress) + throws IOException { + long version = HBaseRevisionManagerUtil.getOutputRevision(job); + return new HBaseBulkRecordWriter(baseOutputFormat.getRecordWriter( + ignored, job, name, progress), version); } - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { - return new HBaseBulkOutputCommitter(baseOutputFormat.getOutputCommitter(context)); + private void getJTDelegationToken(JobConf job) throws IOException { + // Get jobTracker delegation token if security is enabled + // we need to launch the ImportSequenceFile job + if (job.getBoolean("hadoop.security.authorization", false)) { + JobClient jobClient = new JobClient(new JobConf(job)); + try { + job.getCredentials().addToken(new Text("my mr token"), + jobClient.getDelegationToken(null)); + } catch (InterruptedException e) { + throw new IOException("Error while getting JT delegation token", e); + } + } } - private static class HBaseBulkRecordWriter extends RecordWriter,Put> { - private RecordWriter,Put> baseWriter; + private static class HBaseBulkRecordWriter implements + RecordWriter, Put> { - public HBaseBulkRecordWriter(RecordWriter,Put> baseWriter) { + private RecordWriter, Put> baseWriter; + private final Long outputVersion; + + public HBaseBulkRecordWriter( + RecordWriter, Put> baseWriter, + Long outputVersion) { this.baseWriter = baseWriter; + this.outputVersion = outputVersion; } @Override - public void write(WritableComparable key, Put value) throws IOException, InterruptedException { - //we ignore the key - baseWriter.write(EMPTY_LIST, value); + public void write(WritableComparable key, Put value) + throws IOException { + Put put = value; + if (outputVersion != null) { + put = new Put(value.getRow(), outputVersion.longValue()); + for (List row : value.getFamilyMap().values()) { + for (KeyValue el : row) { + put.add(el.getFamily(), el.getQualifier(), el.getValue()); + } + } + } + // we ignore the key + baseWriter.write(EMPTY_LIST, put); } @Override - public void close(TaskAttemptContext context) throws IOException, InterruptedException { - baseWriter.close(context); + public void close(Reporter reporter) throws IOException { + baseWriter.close(reporter); } } - private static class HBaseBulkOutputCommitter extends OutputCommitter { - private OutputCommitter baseOutputCommitter; + public static class HBaseBulkOutputCommitter extends OutputCommitter { - public HBaseBulkOutputCommitter(OutputCommitter baseOutputCommitter) throws IOException { - this.baseOutputCommitter = baseOutputCommitter; + private final OutputCommitter baseOutputCommitter; + + public HBaseBulkOutputCommitter() { + baseOutputCommitter = new FileOutputCommitter(); } @Override - public void abortTask(TaskAttemptContext context) throws IOException { - baseOutputCommitter.abortTask(context); + public void abortTask(TaskAttemptContext taskContext) + throws IOException { + baseOutputCommitter.abortTask(taskContext); } @Override - public void commitTask(TaskAttemptContext context) throws IOException { - baseOutputCommitter.commitTask(context); + public void commitTask(TaskAttemptContext taskContext) + throws IOException { + baseOutputCommitter.commitTask(taskContext); } @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return baseOutputCommitter.needsTaskCommit(context); + public boolean needsTaskCommit(TaskAttemptContext taskContext) + throws IOException { + return baseOutputCommitter.needsTaskCommit(taskContext); } @Override - public void setupJob(JobContext context) throws IOException { - baseOutputCommitter.setupJob(context); + public void setupJob(JobContext jobContext) throws IOException { + baseOutputCommitter.setupJob(jobContext); } @Override - public void setupTask(TaskAttemptContext context) throws IOException { - baseOutputCommitter.setupTask(context); + public void setupTask(TaskAttemptContext taskContext) + throws IOException { + baseOutputCommitter.setupTask(taskContext); } @Override - public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { + public void abortJob(JobContext jobContext, int status) + throws IOException { + baseOutputCommitter.abortJob(jobContext, status); RevisionManager rm = null; try { - baseOutputCommitter.abortJob(jobContext,state); - rm = HBaseHCatStorageHandler.getOpenedRevisionManager(jobContext.getConfiguration()); - rm.abortWriteTransaction(HBaseHCatStorageHandler.getWriteTransaction(jobContext.getConfiguration())); + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + rm.abortWriteTransaction(HBaseRevisionManagerUtil + .getWriteTransaction(jobContext.getConfiguration())); } finally { cleanIntermediate(jobContext); - if(rm != null) + if (rm != null) rm.close(); } } @Override public void commitJob(JobContext jobContext) throws IOException { + baseOutputCommitter.commitJob(jobContext); RevisionManager rm = null; try { - baseOutputCommitter.commitJob(jobContext); Configuration conf = jobContext.getConfiguration(); - Path srcPath = FileOutputFormat.getOutputPath(jobContext); - Path destPath = new Path(srcPath.getParent(),srcPath.getName()+"_hfiles"); + Path srcPath = FileOutputFormat.getOutputPath(jobContext.getJobConf()); + Path destPath = new Path(srcPath.getParent(), srcPath.getName() + "_hfiles"); ImportSequenceFile.runJob(jobContext, - conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY), - srcPath, - destPath); - rm = HBaseHCatStorageHandler.getOpenedRevisionManager(jobContext.getConfiguration()); - rm.commitWriteTransaction(HBaseHCatStorageHandler.getWriteTransaction(jobContext.getConfiguration())); + conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY), + srcPath, + destPath); + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(conf)); cleanIntermediate(jobContext); } finally { - if(rm != null) + if (rm != null) rm.close(); } } - public void cleanIntermediate(JobContext jobContext) throws IOException { + private void cleanIntermediate(JobContext jobContext) + throws IOException { FileSystem fs = FileSystem.get(jobContext.getConfiguration()); - fs.delete(FileOutputFormat.getOutputPath(jobContext),true); + fs.delete(FileOutputFormat.getOutputPath(jobContext.getJobConf()), true); } } } Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java (revision 0) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java (revision 0) @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hcatalog.hbase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hive.hbase.HBaseSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hcatalog.hbase.snapshot.RevisionManagerFactory; +import org.apache.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hcatalog.hbase.snapshot.Transaction; +import org.apache.hcatalog.hbase.snapshot.ZKBasedRevisionManager; +import org.apache.hcatalog.mapreduce.HCatTableInfo; +import org.apache.hcatalog.mapreduce.InputJobInfo; +import org.apache.hcatalog.mapreduce.OutputJobInfo; +import org.apache.hcatalog.mapreduce.StorerInfo; + + +/** + * The Class HBaseRevisionManagerUtil has utility methods to interact with Revision Manager + * + */ +public class HBaseRevisionManagerUtil { + + private final static Log LOG = LogFactory.getLog(HBaseRevisionManagerUtil.class); + + private HBaseRevisionManagerUtil() { + } + + /** + * Creates the latest snapshot of the table. + * + * @param jobConf The job configuration. + * @param hbaseTableName The fully qualified name of the HBase table. + * @param tableInfo HCat table information + * @return An instance of HCatTableSnapshot + * @throws IOException Signals that an I/O exception has occurred. + */ + static HCatTableSnapshot createSnapshot(Configuration jobConf, + String hbaseTableName, HCatTableInfo tableInfo ) throws IOException { + + RevisionManager rm = null; + TableSnapshot snpt; + try { + rm = getOpenedRevisionManager(jobConf); + snpt = rm.createSnapshot(hbaseTableName); + } finally { + closeRevisionManagerQuietly(rm); + } + + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(snpt, tableInfo); + return hcatSnapshot; + } + + /** + * Creates the snapshot using the revision specified by the user. + * + * @param jobConf The job configuration. + * @param tableName The fully qualified name of the table whose snapshot is being taken. + * @param revision The revision number to use for the snapshot. + * @return An instance of HCatTableSnapshot. + * @throws IOException Signals that an I/O exception has occurred. + */ + static HCatTableSnapshot createSnapshot(Configuration jobConf, + String tableName, long revision) + throws IOException { + + TableSnapshot snpt; + RevisionManager rm = null; + try { + rm = getOpenedRevisionManager(jobConf); + snpt = rm.createSnapshot(tableName, revision); + } finally { + closeRevisionManagerQuietly(rm); + } + + String inputJobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); + if(inputJobString == null){ + throw new IOException( + "InputJobInfo information not found in JobContext. " + + "HCatInputFormat.setInput() not called?"); + } + InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(inputJobString); + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil + .convertSnapshot(snpt, inputInfo.getTableInfo()); + + return hcatSnapshot; + } + + /** + * Gets an instance of revision manager which is opened. + * + * @param jobConf The job configuration. + * @return RevisionManager An instance of revision manager. + * @throws IOException + */ + static RevisionManager getOpenedRevisionManager(Configuration jobConf) throws IOException { + + Properties properties = new Properties(); + String zkHostList = jobConf.get(HConstants.ZOOKEEPER_QUORUM); + int port = jobConf.getInt("hbase.zookeeper.property.clientPort", + HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); + + if (zkHostList != null) { + String[] splits = zkHostList.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + sb.append(','); + } + + sb.deleteCharAt(sb.length() - 1); + properties.put(ZKBasedRevisionManager.HOSTLIST, sb.toString()); + } + String dataDir = jobConf.get(ZKBasedRevisionManager.DATADIR); + if (dataDir != null) { + properties.put(ZKBasedRevisionManager.DATADIR, dataDir); + } + String rmClassName = jobConf.get( + RevisionManager.REVISION_MGR_IMPL_CLASS, + ZKBasedRevisionManager.class.getName()); + properties.put(RevisionManager.REVISION_MGR_IMPL_CLASS, rmClassName); + RevisionManager revisionManger = RevisionManagerFactory + .getRevisionManager(properties); + revisionManger.open(); + return revisionManger; + } + + static void closeRevisionManagerQuietly(RevisionManager rm) { + if (rm != null) { + try { + rm.close(); + } catch (IOException e) { + LOG.warn("Error while trying to close revision manager", e); + } + } + } + + + static HCatTableSnapshot convertSnapshot(TableSnapshot hbaseSnapshot, + HCatTableInfo hcatTableInfo) throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); + HashMap revisionMap = new HashMap(); + + for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { + if(hcatHbaseColMap.containsKey(fSchema.getName())){ + String colFamily = hcatHbaseColMap.get(fSchema.getName()); + long revisionID = hbaseSnapshot.getRevision(colFamily); + revisionMap.put(fSchema.getName(), revisionID); + } + } + + HCatTableSnapshot hcatSnapshot = new HCatTableSnapshot( + hcatTableInfo.getDatabaseName(), hcatTableInfo.getTableName(),revisionMap,hbaseSnapshot.getLatestRevision()); + return hcatSnapshot; + } + + static TableSnapshot convertSnapshot(HCatTableSnapshot hcatSnapshot, + HCatTableInfo hcatTableInfo) throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + Map revisionMap = new HashMap(); + Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); + for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { + String colFamily = hcatHbaseColMap.get(fSchema.getName()); + if (hcatSnapshot.containsColumn(fSchema.getName())) { + long revision = hcatSnapshot.getRevision(fSchema.getName()); + revisionMap.put(colFamily, revision); + } + } + + String fullyQualifiedName = hcatSnapshot.getDatabaseName() + "." + + hcatSnapshot.getTableName(); + return new TableSnapshot(fullyQualifiedName, revisionMap,hcatSnapshot.getLatestRevision()); + + } + + /** + * Begins a transaction in the revision manager for the given table. + * @param qualifiedTableName Name of the table + * @param tableInfo HCat Table information + * @param jobConf Job Configuration + * @return The new transaction in revision manager + * @throws IOException + */ + static Transaction beginWriteTransaction(String qualifiedTableName, + HCatTableInfo tableInfo, Configuration jobConf) throws IOException { + Transaction txn; + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(jobConf); + String hBaseColumns = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseConstants.PROPERTY_COLUMN_MAPPING_KEY); + String[] splits = hBaseColumns.split("[,|:]"); + Set families = new HashSet(); + for (int i = 0; i < splits.length; i += 2) { + if (!splits[i].isEmpty()) + families.add(splits[i]); + } + txn = rm.beginWriteTransaction(qualifiedTableName, new ArrayList(families)); + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); + } + return txn; + } + + static Transaction getWriteTransaction(Configuration conf) throws IOException { + OutputJobInfo outputJobInfo = (OutputJobInfo)HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + return (Transaction) HCatUtil.deserialize(outputJobInfo.getProperties() + .getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); + } + + static void setWriteTransaction(Configuration conf, Transaction txn) throws IOException { + OutputJobInfo outputJobInfo = (OutputJobInfo)HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); + conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + } + + /** + * Get the Revision number that will be assigned to this job's output data + * @param conf configuration of the job + * @return the revision number used + * @throws IOException + */ + static long getOutputRevision(Configuration conf) throws IOException { + return getWriteTransaction(conf).getRevisionNumber(); + } + + private static Map getHCatHBaseColumnMapping( HCatTableInfo hcatTableInfo) + throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + StorerInfo storeInfo = hcatTableInfo.getStorerInfo(); + String hbaseColumnMapping = storeInfo.getProperties().getProperty( + HBaseConstants.PROPERTY_COLUMN_MAPPING_KEY); + + Map hcatHbaseColMap = new HashMap(); + List columnFamilies = new ArrayList(); + List columnQualifiers = new ArrayList(); + try { + HBaseSerDe.parseColumnMapping(hbaseColumnMapping, columnFamilies, + null, columnQualifiers, null); + } catch (SerDeException e) { + throw new IOException("Exception while converting snapshots.", e); + } + + for (HCatFieldSchema column : hcatTableSchema.getFields()) { + int fieldPos = hcatTableSchema.getPosition(column.getName()); + String colFamily = columnFamilies.get(fieldPos); + if (colFamily.equals(HBaseSerDe.HBASE_KEY_COL) == false) { + hcatHbaseColMap.put(column.getName(), colFamily); + } + } + + return hcatHbaseColMap; + } + +} Index: storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseSerDeResultConverter.java =================================================================== --- storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseSerDeResultConverter.java (revision 1295948) +++ storage-drivers/hbase/src/java/org/apache/hcatalog/hbase/HBaseSerDeResultConverter.java (working copy) @@ -1,321 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hive.hbase.HBaseSerDe; -import org.apache.hadoop.hive.hbase.LazyHBaseRow; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -/** - * Implementation of ResultConverter using HBaseSerDe - * mapping between HBase schema and HCatRecord schema is defined by - * {@link HBaseConstants.PROPERTY_COLUMN_MAPPING_KEY} - */ -class HBaseSerDeResultConverter implements ResultConverter { - - private HBaseSerDe serDe; - private HCatSchema schema; - private HCatSchema outputSchema; - private StructObjectInspector hCatRecordOI; - private StructObjectInspector lazyHBaseRowOI; - private String hbaseColumnMapping; - private final Long outputVersion; - - /** - * @param schema table schema - * @param outputSchema schema of projected output - * @param hcatProperties table properties - * @throws IOException thrown if hive's HBaseSerDe couldn't be initialized - */ - HBaseSerDeResultConverter(HCatSchema schema, - HCatSchema outputSchema, - Properties hcatProperties) throws IOException { - this(schema,outputSchema,hcatProperties,null); - } - - /** - * @param schema table schema - * @param outputSchema schema of projected output - * @param hcatProperties table properties - * @param outputVersion value to write in timestamp field - * @throws IOException thrown if hive's HBaseSerDe couldn't be initialized - */ - HBaseSerDeResultConverter(HCatSchema schema, - HCatSchema outputSchema, - Properties hcatProperties, - Long outputVersion) throws IOException { - - hbaseColumnMapping = hcatProperties.getProperty(HBaseConstants.PROPERTY_COLUMN_MAPPING_KEY); - hcatProperties.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,hbaseColumnMapping); - - this.outputVersion = outputVersion; - - this.schema = schema; - if(outputSchema == null) { - this.outputSchema = schema; - } - else { - this.outputSchema = outputSchema; - } - - hCatRecordOI = createStructObjectInspector(); - try { - serDe = new HBaseSerDe(); - serDe.initialize(new Configuration(),hcatProperties); - lazyHBaseRowOI = (StructObjectInspector) serDe.getObjectInspector(); - } catch (SerDeException e) { - throw new IOException("SerDe initialization failed",e); - } - } - - @Override - public Put convert(HCatRecord record) throws IOException { - try { - //small hack to explicitly specify timestamp/version number to use - //since HBaseSerDe does not support specifying it - //will have to decide whether we will write our own or contribute code - //for the SerDe - Put put = (Put)serDe.serialize(record.getAll(),hCatRecordOI); - Put res; - if(outputVersion == null) { - res = put; - } - else { - res = new Put(put.getRow(),outputVersion.longValue()); - for(List row: put.getFamilyMap().values()) { - for(KeyValue el: row) { - res.add(el.getFamily(),el.getQualifier(),el.getValue()); - } - } - } - return res; - } catch (SerDeException e) { - throw new IOException("serialization failed",e); - } - } - - @Override - public HCatRecord convert(Result result) throws IOException { - // Deserialize bytesRefArray into struct and then convert that struct to - // HCatRecord. - LazyHBaseRow struct; - try { - struct = (LazyHBaseRow)serDe.deserialize(result); - } catch (SerDeException e) { - throw new IOException(e); - } - - List outList = new ArrayList(outputSchema.size()); - - String colName; - Integer index; - - for(HCatFieldSchema col : outputSchema.getFields()){ - - colName = col.getName().toLowerCase(); - index = outputSchema.getPosition(colName); - - if(index != null){ - StructField field = lazyHBaseRowOI.getStructFieldRef(colName); - outList.add(getTypedObj(lazyHBaseRowOI.getStructFieldData(struct, field), field.getFieldObjectInspector())); - } - } - return new DefaultHCatRecord(outList); - } - - private Object getTypedObj(Object data, ObjectInspector oi) throws IOException{ - // The real work-horse method. We are gobbling up all the laziness benefits - // of Hive-LazyHBaseRow by deserializing everything and creating crisp HCatRecord - // with crisp Java objects inside it. We have to do it because higher layer - // may not know how to do it. - //TODO leverage laziness of SerDe - switch(oi.getCategory()){ - - case PRIMITIVE: - return ((PrimitiveObjectInspector)oi).getPrimitiveJavaObject(data); - - case MAP: - MapObjectInspector moi = (MapObjectInspector)oi; - Map lazyMap = moi.getMap(data); - ObjectInspector keyOI = moi.getMapKeyObjectInspector(); - ObjectInspector valOI = moi.getMapValueObjectInspector(); - Map typedMap = new HashMap(lazyMap.size()); - for(Map.Entry e : lazyMap.entrySet()){ - typedMap.put(getTypedObj(e.getKey(), keyOI), getTypedObj(e.getValue(), valOI)); - } - return typedMap; - - case LIST: - ListObjectInspector loi = (ListObjectInspector)oi; - List lazyList = loi.getList(data); - ObjectInspector elemOI = loi.getListElementObjectInspector(); - List typedList = new ArrayList(lazyList.size()); - Iterator itr = lazyList.listIterator(); - while(itr.hasNext()){ - typedList.add(getTypedObj(itr.next(),elemOI)); - } - return typedList; - - case STRUCT: - StructObjectInspector soi = (StructObjectInspector)oi; - List fields = soi.getAllStructFieldRefs(); - List typedStruct = new ArrayList(fields.size()); - for(StructField field : fields){ - typedStruct.add( getTypedObj(soi.getStructFieldData(data, field), field.getFieldObjectInspector())); - } - return typedStruct; - - - default: - throw new IOException("Don't know how to deserialize: "+oi.getCategory()); - - } - } - - private StructObjectInspector createStructObjectInspector() throws IOException { - - if( outputSchema == null ) { - throw new IOException("Invalid output schema specified"); - } - - List fieldInspectors = new ArrayList(); - List fieldNames = new ArrayList(); - - for(HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { - TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); - - fieldNames.add(hcatFieldSchema.getName()); - fieldInspectors.add(getObjectInspector(type)); - } - - StructObjectInspector structInspector = ObjectInspectorFactory. - getStandardStructObjectInspector(fieldNames, fieldInspectors); - return structInspector; - } - - private ObjectInspector getObjectInspector(TypeInfo type) throws IOException { - - switch(type.getCategory()) { - - case PRIMITIVE : - PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; - return PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); - - case MAP : - MapTypeInfo mapType = (MapTypeInfo) type; - MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( - getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); - return mapInspector; - - case LIST : - ListTypeInfo listType = (ListTypeInfo) type; - ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( - getObjectInspector(listType.getListElementTypeInfo())); - return listInspector; - - case STRUCT : - StructTypeInfo structType = (StructTypeInfo) type; - List fieldTypes = structType.getAllStructFieldTypeInfos(); - - List fieldInspectors = new ArrayList(); - for(TypeInfo fieldType : fieldTypes) { - fieldInspectors.add(getObjectInspector(fieldType)); - } - - StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - structType.getAllStructFieldNames(), fieldInspectors); - return structInspector; - - default : - throw new IOException("Unknown field schema type"); - } - } - - public String getHBaseScanColumns() throws IOException { - StringBuilder sb = new StringBuilder(); - if(hbaseColumnMapping == null){ - throw new IOException("HBase column mapping found to be null."); - } - - List outputFieldNames = this.outputSchema.getFieldNames(); - List outputColumnMapping = new ArrayList(); - for(String fieldName: outputFieldNames){ - int position = schema.getPosition(fieldName); - outputColumnMapping.add(position); - } - - try { - List columnFamilies = new ArrayList(); - List columnQualifiers = new ArrayList(); - HBaseSerDe.parseColumnMapping(hbaseColumnMapping, columnFamilies, null, columnQualifiers, null); - for(int i = 0; i < outputColumnMapping.size(); i++){ - int cfIndex = outputColumnMapping.get(i); - String cf = columnFamilies.get(cfIndex); - // We skip the key column. - if (cf.equals(HBaseSerDe.HBASE_KEY_COL) == false) { - String qualifier = columnQualifiers.get(i); - sb.append(cf); - sb.append(":"); - if (qualifier != null) { - sb.append(qualifier); - } - sb.append(" "); - } - } - - } catch (SerDeException e) { - - throw new IOException(e); - } - - return sb.toString(); - } -} Index: src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java (revision 1295948) +++ src/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hcatalog.data.HCatRecord; import java.io.IOException; +import java.text.NumberFormat; /** * Bare bones implementation of OutputFormatContainer. Does only the required @@ -38,10 +39,20 @@ */ class DefaultOutputFormatContainer extends OutputFormatContainer { + private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); + + static { + NUMBER_FORMAT.setMinimumIntegerDigits(5); + NUMBER_FORMAT.setGroupingUsed(false); + } + public DefaultOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, Writable> of) { super(of); } + static synchronized String getOutputName(int partition) { + return "part-" + NUMBER_FORMAT.format(partition); + } /** * Get the record writer for the job. Uses the Table's default OutputStorageDriver @@ -53,8 +64,9 @@ @Override public RecordWriter, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); return new DefaultRecordWriterContainer(context, - getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()),null, InternalUtil.createReporter(context))); + getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); } Index: src/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java (revision 1295948) +++ src/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java (working copy) @@ -72,7 +72,7 @@ public void write(WritableComparable key, HCatRecord value) throws IOException, InterruptedException { try { - getBaseRecordWriter().write(null, serDe.serialize(value, hcatRecordOI)); + getBaseRecordWriter().write(null, serDe.serialize(value.getAll(), hcatRecordOI)); } catch (SerDeException e) { throw new IOException("Failed to serialize object",e); } Index: src/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java (revision 1295948) +++ src/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java (working copy) @@ -20,11 +20,12 @@ import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.mapred.HCatMapRedUtil; import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.JobStatus.State; import org.apache.hcatalog.common.HCatConstants; @@ -36,6 +37,8 @@ */ class DefaultOutputCommitterContainer extends OutputCommitterContainer { + private static final Log LOG = LogFactory.getLog(DefaultOutputCommitterContainer.class); + /** * @param context current JobContext * @param baseCommitter OutputCommitter to contain @@ -86,11 +89,9 @@ public void cleanupJob(JobContext context) throws IOException { getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context)); - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - //Cancel HCat and JobTracker tokens try { - HiveConf hiveConf = HCatUtil.getHiveConf(null, + HiveConf hiveConf = HCatUtil.getHiveConf(null, context.getConfiguration()); HiveMetaStoreClient client = HCatUtil.createHiveClient(hiveConf); String tokenStrForm = client.getTokenStrForm(); @@ -98,7 +99,7 @@ client.cancelDelegationToken(tokenStrForm); } } catch (Exception e) { - throw new IOException("Failed to cancel delegation token",e); + LOG.warn("Failed to cancel delegation token", e); } } }