diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java index 10261cd..ecf4595 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java @@ -82,9 +82,9 @@ import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; - // Class that has nothing but a main. // Does a Put, Get and a Scan against an hbase table. +// The API described here is since HBase 1.0. public class MyLittleHBaseClient { public static void main(String[] args) throws IOException { // You need a configuration object to tell the client where to connect. @@ -94,15 +94,24 @@ public class MyLittleHBaseClient { Configuration config = HBaseConfiguration.create(); // Next you need a Connection to the cluster. Create one. When done with it, - // close it (Should start a try/finally after this creation so it gets closed - // for sure but leaving this out for readibility's sake). + // close it. A try/finally is a good way to ensure it gets closed or use + // the jdk7 idiom, try-with-resources: see + // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html + // + // Connections are heavyweight. Create one once and keep it around. From a Connection + // you get a Table instance to access Tables, an Admin instance to administer the cluster, + // and RegionLocator to find where regions are out on the cluster. As opposed to Connections, + // Table, Admin and RegionLocator instances are lightweight; create as you need them and then + // close when done. + // Connection connection = ConnectionFactory.createConnection(config); try { - // This instantiates a Table object that connects you to - // the "myLittleHBaseTable" table (TableName.valueOf turns String into TableName instance). + // The below instantiates a Table object that connects you to the "myLittleHBaseTable" table + // (TableName.valueOf turns String into a TableName instance). // When done with it, close it (Should start a try/finally after this creation so it gets - // closed for sure but leaving this out for readibility's sake). + // closed for sure the jdk7 idiom, try-with-resources: see + // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) Table table = connection.getTable(TableName.valueOf("myLittleHBaseTable")); try { @@ -112,7 +121,7 @@ public class MyLittleHBaseClient { // below, we are converting the String "myLittleRow" into a byte array to // use as a row key for our update. Once you have a Put instance, you can // adorn it by setting the names of columns you want to update on the row, - // the timestamp to use in your update, etc.If no timestamp, the server + // the timestamp to use in your update, etc. If no timestamp, the server // applies current time to the edits. Put p = new Put(Bytes.toBytes("myLittleRow")); @@ -138,6 +147,7 @@ public class MyLittleHBaseClient { Result r = table.get(g); byte [] value = r.getValue(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier")); + // If we convert the value bytes, we should get back 'Some Value', the // value we inserted at this location. String valueStr = Bytes.toString(value); diff --git a/src/main/docbkx/book.xml b/src/main/docbkx/book.xml index 862d467..f835dc7 100644 --- a/src/main/docbkx/book.xml +++ b/src/main/docbkx/book.xml @@ -542,16 +542,17 @@ create 'bar', 'fam' Data Model Operations The four primary data model operations are Get, Put, Scan, and Delete. Operations are applied via HTable - instances. + xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html">Table + instances. +
Get Get returns attributes for a specified row. Gets are executed via - HTable.get. + xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#get(org.apache.hadoop.hbase.client.Get)"> + Table.get.
@@ -560,10 +561,10 @@ create 'bar', 'fam' xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Put.html">Put either adds new rows to a table (if the key is new) or can update existing rows (if the key already exists). Puts are executed via - HTable.put (writeBuffer) or - HTable.batch (non-writeBuffer). + xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#put(org.apache.hadoop.hbase.client.Put)"> + Table.put (writeBuffer) or + Table.batch (non-writeBuffer).
@@ -571,27 +572,26 @@ create 'bar', 'fam' Scan allow iteration over multiple rows for specified attributes. - The following is an example of a on an HTable table instance. Assume that a table is + The following is an example of a Scan on a Table instance. Assume that a table is populated with rows with keys "row1", "row2", "row3", and then another set of rows with the keys "abc1", "abc2", and "abc3". The following example shows how to set a Scan instance to return the rows beginning with "row". - + public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... -HTable htable = ... // instantiate HTable +Table table = ... // instantiate a Table instance Scan scan = new Scan(); scan.addColumn(CF, ATTR); scan.setRowPrefixFilter(Bytes.toBytes("row")); -ResultScanner rs = htable.getScanner(scan); +ResultScanner rs = table.getScanner(scan); try { for (Result r = rs.next(); r != null; r = rs.next()) { // process result... } finally { rs.close(); // always close the ResultScanner! -} Note that generally the easiest way to specify a specific stop point for a scan is by using the Delete removes a row from a table. Deletes are executed via + xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#delete(org.apache.hadoop.hbase.client.Delete)"> HTable.delete. HBase does not modify data in place, and so deletes are handled by creating new markers called tombstones. These tombstones, along with the dead @@ -737,7 +737,7 @@ public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... Get get = new Get(Bytes.toBytes("row1")); -Result r = htable.get(get); +Result r = table.get(get); byte[] b = r.getValue(CF, ATTR); // returns current version of value
@@ -751,7 +751,7 @@ public static final byte[] ATTR = "attr".getBytes(); ... Get get = new Get(Bytes.toBytes("row1")); get.setMaxVersions(3); // will return last 3 versions of row -Result r = htable.get(get); +Result r = table.get(get); byte[] b = r.getValue(CF, ATTR); // returns current version of value List<KeyValue> kv = r.getColumn(CF, ATTR); // returns all versions of this column @@ -779,7 +779,7 @@ public static final byte[] ATTR = "attr".getBytes(); ... Put put = new Put(Bytes.toBytes(row)); put.add(CF, ATTR, Bytes.toBytes( data)); -htable.put(put); +table.put(put);
Caution: the version timestamp is internally by HBase for things like time-to-live calculations. It's usually best to avoid setting this timestamp yourself. Prefer using @@ -1456,7 +1456,7 @@ if (!b) { HBase MapReduce Summary to HBase Without Reducer It is also possible to perform summaries without a reducer - if you use HBase as the reducer. - An HBase target table would need to exist for the job summary. The HTable method + An HBase target table would need to exist for the job summary. The Table method incrementColumnValue would be used to atomically increment values. From a performance perspective, it might make sense to keep a Map of values with their values to be incremeneted for each map-task, and make one update per key at during the @@ -1508,12 +1508,14 @@ if (!b) { Accessing Other HBase Tables in a MapReduce Job Although the framework currently allows one HBase table as input to a MapReduce job, other HBase tables can be accessed as lookup tables, etc., in a MapReduce job via creating - an HTable instance in the setup method of the Mapper. + an Table instance in the setup method of the Mapper. public class MyMapper extends TableMapper<Text, LongWritable> { - private HTable myOtherTable; + private Table myOtherTable; public void setup(Context context) { - myOtherTable = new HTable("myOtherTable"); + // In here create a Connection to the cluster and save it or use the Connection + // from the existing table + myOtherTable = connection.getTable("myOtherTable"); } public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException { @@ -1701,9 +1703,7 @@ if (!b) {
Client - The HBase client HTable - is responsible for finding RegionServers that are serving the particular row range of + The HBase client finds the RegionServers that are serving the particular row range of interest. It does this by querying the hbase:meta table. See for details. After locating the required region(s), the client contacts the RegionServer serving that region, rather than going through the master, @@ -1711,21 +1711,33 @@ if (!b) { subsequent requests need not go through the lookup process. Should a region be reassigned either by the master load balancer or because a RegionServer has died, the client will requery the catalog tables to determine the new location of the user region. + See for more information about the impact of the Master on HBase Client communication. - Administrative functions are handled through HBaseAdmin + Administrative functions are done via an instance of Admin +
- Connections - For connection configuration information, see . + Cluster Connections + The API changed in HBase 1.0. Its been cleaned up and users are returned + Interfaces to work against rather than particular types. In HBase 1.0, + obtain a cluster Connection from ConnectionFactory and thereafter, get from it + instances of Table, Admin, and RegionLocator on an as-need basis. When done, close + obtained instances. Finally, be sure to cleanup your Connection instance before + exiting. Connections are heavyweight objects. Create once and keep an instance around. + Table, Admin and RegionLocator instances are lightweight. Create as you go and then + let go as soon as you are done by closing them. See the + Client Package Javadoc Description for example usage of the new HBase 1.0 API. + + For connection configuration information, see . + HTable - instances are not thread-safe. Only one thread use an instance of HTable at - any given time. When creating HTable instances, it is advisable to use the same Table + instances are not thread-safe. Only one thread can use an instance of Table at + any given time. When creating Table instances, it is advisable to use the same HBaseConfiguration instance. This will ensure sharing of ZooKeeper and socket instances to the RegionServers which is usually what you want. For example, this is preferred: diff --git a/src/main/docbkx/upgrading.xml b/src/main/docbkx/upgrading.xml index 82fdd1e..86b5585 100644 --- a/src/main/docbkx/upgrading.xml +++ b/src/main/docbkx/upgrading.xml @@ -164,7 +164,7 @@ From 0.96.x to 1.0.0 You cannot do a from 0.96.x to 1.0.0 without first doing a rolling upgrade to 0.98.x. See comment in - HBASE-11164 Document and test rolling updates from 0.98 -> 1.0 for the why. + HBASE-11164 Document and test rolling updates from 0.98 -> 1.0 for the why. Also because hbase-1.0.0 enables hfilev3 by default, HBASE-9801 Change the default HFile version to V3, and support for hfilev3 only arrives in 0.98, this is another reason you cannot rolling upgrade from hbase-0.96.x;