diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java index 10261cd..ecf4595 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/package-info.java @@ -82,9 +82,9 @@ import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; - // Class that has nothing but a main. // Does a Put, Get and a Scan against an hbase table. +// The API described here is since HBase 1.0. public class MyLittleHBaseClient { public static void main(String[] args) throws IOException { // You need a configuration object to tell the client where to connect. @@ -94,15 +94,24 @@ public class MyLittleHBaseClient { Configuration config = HBaseConfiguration.create(); // Next you need a Connection to the cluster. Create one. When done with it, - // close it (Should start a try/finally after this creation so it gets closed - // for sure but leaving this out for readibility's sake). + // close it. A try/finally is a good way to ensure it gets closed or use + // the jdk7 idiom, try-with-resources: see + // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html + // + // Connections are heavyweight. Create one once and keep it around. From a Connection + // you get a Table instance to access Tables, an Admin instance to administer the cluster, + // and RegionLocator to find where regions are out on the cluster. As opposed to Connections, + // Table, Admin and RegionLocator instances are lightweight; create as you need them and then + // close when done. + // Connection connection = ConnectionFactory.createConnection(config); try { - // This instantiates a Table object that connects you to - // the "myLittleHBaseTable" table (TableName.valueOf turns String into TableName instance). + // The below instantiates a Table object that connects you to the "myLittleHBaseTable" table + // (TableName.valueOf turns String into a TableName instance). // When done with it, close it (Should start a try/finally after this creation so it gets - // closed for sure but leaving this out for readibility's sake). + // closed for sure the jdk7 idiom, try-with-resources: see + // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) Table table = connection.getTable(TableName.valueOf("myLittleHBaseTable")); try { @@ -112,7 +121,7 @@ public class MyLittleHBaseClient { // below, we are converting the String "myLittleRow" into a byte array to // use as a row key for our update. Once you have a Put instance, you can // adorn it by setting the names of columns you want to update on the row, - // the timestamp to use in your update, etc.If no timestamp, the server + // the timestamp to use in your update, etc. If no timestamp, the server // applies current time to the edits. Put p = new Put(Bytes.toBytes("myLittleRow")); @@ -138,6 +147,7 @@ public class MyLittleHBaseClient { Result r = table.get(g); byte [] value = r.getValue(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier")); + // If we convert the value bytes, we should get back 'Some Value', the // value we inserted at this location. String valueStr = Bytes.toString(value); diff --git a/src/main/docbkx/book.xml b/src/main/docbkx/book.xml index 862d467..f835dc7 100644 --- a/src/main/docbkx/book.xml +++ b/src/main/docbkx/book.xml @@ -542,16 +542,17 @@ create 'bar', 'fam'
incrementColumnValue would be used to atomically increment values. From a
performance perspective, it might make sense to keep a Map of values with their values to
be incremeneted for each map-task, and make one update per key at during the
@@ -1508,12 +1508,14 @@ if (!b) {
Accessing Other HBase Tables in a MapReduce Job
Although the framework currently allows one HBase table as input to a MapReduce job,
other HBase tables can be accessed as lookup tables, etc., in a MapReduce job via creating
- an HTable instance in the setup method of the Mapper.
+ an Table instance in the setup method of the Mapper.
public class MyMapper extends TableMapper<Text, LongWritable> {
- private HTable myOtherTable;
+ private Table myOtherTable;
public void setup(Context context) {
- myOtherTable = new HTable("myOtherTable");
+ // In here create a Connection to the cluster and save it or use the Connection
+ // from the existing table
+ myOtherTable = connection.getTable("myOtherTable");
}
public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {
@@ -1701,9 +1703,7 @@ if (!b) {
Client
- The HBase client HTable
- is responsible for finding RegionServers that are serving the particular row range of
+ The HBase client finds the RegionServers that are serving the particular row range of
interest. It does this by querying the hbase:meta table. See for details. After locating the required region(s), the
client contacts the RegionServer serving that region, rather than going through the master,
@@ -1711,21 +1711,33 @@ if (!b) {
subsequent requests need not go through the lookup process. Should a region be reassigned
either by the master load balancer or because a RegionServer has died, the client will
requery the catalog tables to determine the new location of the user region.
+
See for more information about the impact of the Master on HBase
Client communication.
- Administrative functions are handled through HBaseAdmin
+ Administrative functions are done via an instance of Admin
+
- Connections
- For connection configuration information, see .
+ Cluster Connections
+ The API changed in HBase 1.0. Its been cleaned up and users are returned
+ Interfaces to work against rather than particular types. In HBase 1.0,
+ obtain a cluster Connection from ConnectionFactory and thereafter, get from it
+ instances of Table, Admin, and RegionLocator on an as-need basis. When done, close
+ obtained instances. Finally, be sure to cleanup your Connection instance before
+ exiting. Connections are heavyweight objects. Create once and keep an instance around.
+ Table, Admin and RegionLocator instances are lightweight. Create as you go and then
+ let go as soon as you are done by closing them. See the
+ Client Package Javadoc Description for example usage of the new HBase 1.0 API.
+
+ For connection configuration information, see .
+
HTable
- instances are not thread-safe . Only one thread use an instance of HTable at
- any given time. When creating HTable instances, it is advisable to use the same Table
+ instances are not thread-safe. Only one thread can use an instance of Table at
+ any given time. When creating Table instances, it is advisable to use the same HBaseConfiguration
instance. This will ensure sharing of ZooKeeper and socket instances to the RegionServers
which is usually what you want. For example, this is preferred:
diff --git a/src/main/docbkx/upgrading.xml b/src/main/docbkx/upgrading.xml
index 82fdd1e..86b5585 100644
--- a/src/main/docbkx/upgrading.xml
+++ b/src/main/docbkx/upgrading.xml
@@ -164,7 +164,7 @@
From 0.96.x to 1.0.0
You cannot do a from 0.96.x to 1.0.0 without
first doing a rolling upgrade to 0.98.x. See comment in
- HBASE-11164 Document and test rolling updates from 0.98 -> 1.0 for the why.
+ HBASE-11164 Document and test rolling updates from 0.98 -> 1.0 for the why.
Also because hbase-1.0.0 enables hfilev3 by default,
HBASE-9801 Change the default HFile version to V3,
and support for hfilev3 only arrives in 0.98, this is another reason you cannot rolling upgrade from hbase-0.96.x;