diff --git src/main/docbkx/appendix_contributing_to_documentation.xml src/main/docbkx/appendix_contributing_to_documentation.xml index 080525a..2f19c7b 100644 --- src/main/docbkx/appendix_contributing_to_documentation.xml +++ src/main/docbkx/appendix_contributing_to_documentation.xml @@ -107,7 +107,7 @@ For each issue you work on, create a new branch. One convention that works well for naming the branches is to name a given branch the same as the JIRA it relates to: - $ git checkout -b HBASE-123456 + $ git checkout -b HBASE-123456 Make your suggested changes on your branch, committing your changes to your @@ -123,8 +123,8 @@ sure you have built HBase at least once, in order to fetch all the Maven dependencies you need. - $ mvn clean install -DskipTests # Builds HBase - $ mvn clean site -DskipTests # Builds the website and documentation + $ mvn clean install -DskipTests # Builds HBase + $ mvn clean site -DskipTests # Builds the website and documentation If any errors occur, address them. @@ -132,7 +132,7 @@ the area of the code you are working in has had a lot of changes lately, make sure you rebase your branch against the remote master and take care of any conflicts before submitting your patch. - + $ git checkout HBASE-123456 $ git rebase origin/master @@ -141,7 +141,7 @@ $ git rebase origin/master Generate your patch against the remote master. Run the following command from the top level of your git repository (usually called hbase): - $ git diff --no-prefix origin/master > HBASE-123456.patch + $ git diff --no-prefix origin/master > HBASE-123456.patch The name of the patch should contain the JIRA ID. Look over the patch file to be sure that you did not change any additional files by accident and that there are no other surprises. When you are satisfied, attach the patch to the JIRA and @@ -227,7 +227,7 @@ $ git rebase origin/master recommended that you use a <figure> Docbook element for an image. This allows screen readers to navigate to the image and also provides alternative text for the image. The following is an example of a <figure> element. - + HFile Version 1 @@ -295,7 +295,7 @@ $ git rebase origin/master render as block-level elements (they take the whole width of the page), it is better to mark them up as siblings to the paragraphs around them, like this: - This is the paragraph. + This is the paragraph. This is an admonition which occurs after the paragraph. ]]> @@ -312,7 +312,7 @@ $ git rebase origin/master consist of things other than plain text, they need to be wrapped in some element. If they are plain text, they need to be inclosed in <para> tags. This is tedious but necessary for validity. - + This is a paragraph. @@ -367,7 +367,7 @@ $ git rebase origin/master the content. Also, to avoid having an extra blank line at the beginning of the programlisting output, do not put the CDATA element on its own line. For example: - + case $1 in --cleanZk|--cleanHdfs|--cleanAll) matches="yes" ;; @@ -396,6 +396,29 @@ esac especially if you use GUI mode in the editor. + + + Syntax Highlighting + + + The HBase Reference Guide uses the XSLT Syntax Highlighting Maven module for syntax highlighting. + To enable syntax highlighting for a given <programlisting> or + <screen> (or possibly other elements), add the attribute + language=LANGUAGE_OF_CHOICE + to the element, as in the following example: + + bar + foo +]]> + Several syntax types are supported. The most interesting ones for the + HBase Reference Guide are java, xml, + sql, and bourne (for BASH shell + output or Linux command-line examples). + + diff --git src/main/docbkx/book.xml src/main/docbkx/book.xml index 0564354..36f2257 100644 --- src/main/docbkx/book.xml +++ src/main/docbkx/book.xml @@ -300,25 +300,25 @@ A namespace can be created, removed or altered. Namespace membership is determined during table creation by specifying a fully-qualified table name of the form: - :]]> + :
]]> Examples - + #Create a namespace create_namespace 'my_ns' - + #create my_table in my_ns namespace create 'my_ns:my_table', 'fam' - + #drop namespace drop_namespace 'my_ns' - + #alter namespace alter_namespace 'my_ns', {METHOD => 'set', 'PROPERTY_NAME' => 'PROPERTY_VALUE'} @@ -340,7 +340,7 @@ alter_namespace 'my_ns', {METHOD => 'set', 'PROPERTY_NAME' => 'PROPERTY_VALUE'} Examples - + #namespace=foo and table qualifier=bar create 'foo:bar', 'fam' @@ -429,7 +429,7 @@ create 'bar', 'fam' populated with rows with keys "row1", "row2", "row3", and then another set of rows with the keys "abc1", "abc2", and "abc3". The following example shows how startRow and stopRow can be applied to a Scan instance to return the rows beginning with "row". - + public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... @@ -562,7 +562,7 @@ try { xml:id="default_get_example"> Default Get Example The following Get will only retrieve the current version of the row - + public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... @@ -575,7 +575,7 @@ byte[] b = r.getValue(CF, ATTR); // returns current version of value xml:id="versioned_get_example"> Versioned Get Example The following Get will return the last 3 versions of the row. - + public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... @@ -603,7 +603,7 @@ List<KeyValue> kv = r.getColumn(CF, ATTR); // returns all versions of thi Implicit Version Example The following Put will be implicitly versioned by HBase with the current time. - + public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... @@ -616,7 +616,7 @@ htable.put(put); xml:id="explicit_version_example"> Explicit Version Example The following Put has the version timestamp explicitly set. - + public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... @@ -815,7 +815,7 @@ htable.put(put); Be sure to use the correct version of the HBase JAR for your system. The backticks (` symbols) cause ths shell to execute the sub-commands, setting the CLASSPATH as part of the command. This example assumes you use a BASH-compatible shell. - $ HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-0.90.0.jar rowcounter usertable + $ HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-0.90.0.jar rowcounter usertable When the command runs, internally, the HBase JAR finds the dependencies it needs for zookeeper, guava, and its other dependencies on the passed HADOOP_CLASSPATH and adds the JARs to the MapReduce job configuration. See the source at @@ -826,7 +826,7 @@ htable.put(put); java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.mapreduce.RowCounter$RowCounterMapper If this occurs, try modifying the command as follows, so that it uses the HBase JARs from the target/ directory within the build environment. - $ HADOOP_CLASSPATH=${HBASE_HOME}/target/hbase-0.90.0-SNAPSHOT.jar:`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/target/hbase-0.90.0-SNAPSHOT.jar rowcounter usertable + $ HADOOP_CLASSPATH=${HBASE_HOME}/target/hbase-0.90.0-SNAPSHOT.jar:`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/target/hbase-0.90.0-SNAPSHOT.jar rowcounter usertable Notice to Mapreduce users of HBase 0.96.1 and above @@ -876,14 +876,14 @@ Exception in thread "main" java.lang.IllegalAccessError: class HADOOP_CLASSPATH environment variable at job submission time. When launching jobs that package their dependencies, all three of the following job launching commands satisfy this requirement: - + $ HADOOP_CLASSPATH=/path/to/hbase-protocol.jar:/path/to/hbase/conf hadoop jar MyJob.jar MyJobMainClass $ HADOOP_CLASSPATH=$(hbase mapredcp):/path/to/hbase/conf hadoop jar MyJob.jar MyJobMainClass $ HADOOP_CLASSPATH=$(hbase classpath) hadoop jar MyJob.jar MyJobMainClass For jars that do not package their dependencies, the following command structure is necessary: - + $ HADOOP_CLASSPATH=$(hbase mapredcp):/etc/hbase/conf hadoop jar MyApp.jar MyJobMainClass -libjars $(hbase mapredcp | tr ':' ',') ... See also HADOOP_CLASSPATH=$(hbase mapredcp):/etc/hbase/conf hadoop jar MyApp The HBase JAR also serves as a Driver for some bundled mapreduce jobs. To learn about the bundled MapReduce jobs, run the following command. - $ ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-0.90.0-SNAPSHOT.jar + $ ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-0.90.0-SNAPSHOT.jar An example program must be given as the first argument. Valid program names are: copytable: Export a table from local cluster to peer cluster @@ -910,7 +910,7 @@ Valid program names are: Each of the valid program names are bundled MapReduce jobs. To run one of the jobs, model your command after the following example. - $ ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-0.90.0-SNAPSHOT.jar rowcounter myTable + $ ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-0.90.0-SNAPSHOT.jar rowcounter myTable
@@ -972,7 +972,7 @@ Valid program names are: xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/RowCounter.html">RowCounter MapReduce job uses TableInputFormat and does a count of all rows in the specified table. To run it, use the following command: - $ ./bin/hadoop jar hbase-X.X.X.jar + $ ./bin/hadoop jar hbase-X.X.X.jar This will invoke the HBase MapReduce Driver class. Select rowcounter from the choice of jobs offered. This will print rowcouner usage advice to standard output. Specify the tablename, @@ -1011,7 +1011,7 @@ Valid program names are: The following is an example of using HBase as a MapReduce source in read-only manner. Specifically, there is a Mapper instance but no Reducer, and nothing is being emitted from the Mapper. There job would be defined as follows... - + Configuration config = HBaseConfiguration.create(); Job job = new Job(config, "ExampleRead"); job.setJarByClass(MyReadJob.class); // class that contains mapper @@ -1038,7 +1038,7 @@ if (!b) { ...and the mapper instance would extend TableMapper... - + public static class MyMapper extends TableMapper<Text, Text> { public void map(ImmutableBytesWritable row, Result value, Context context) throws InterruptedException, IOException { @@ -1052,7 +1052,7 @@ public static class MyMapper extends TableMapper<Text, Text> { HBase MapReduce Read/Write Example The following is an example of using HBase both as a source and as a sink with MapReduce. This example will simply copy data from one table to another. - + Configuration config = HBaseConfiguration.create(); Job job = new Job(config,"ExampleReadWrite"); job.setJarByClass(MyReadWriteJob.class); // class that contains mapper @@ -1091,7 +1091,7 @@ if (!b) { The following is the example mapper, which will create a Put and matching the input Result and emit it. Note: this is what the CopyTable utility does. - + public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put> { public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException { @@ -1125,7 +1125,7 @@ public static class MyMapper extends TableMapper<ImmutableBytesWritable, Put& The following example uses HBase as a MapReduce source and sink with a summarization step. This example will count the number of distinct instances of a value in a table and write those summarized counts in another table. - + Configuration config = HBaseConfiguration.create(); Job job = new Job(config,"ExampleSummary"); job.setJarByClass(MySummaryJob.class); // class that contains mapper and reducer @@ -1156,7 +1156,7 @@ if (!b) { In this example mapper a column with a String-value is chosen as the value to summarize upon. This value is used as the key to emit from the mapper, and an IntWritable represents an instance counter. - + public static class MyMapper extends TableMapper<Text, IntWritable> { public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR1 = "attr1".getBytes(); @@ -1174,7 +1174,7 @@ public static class MyMapper extends TableMapper<Text, IntWritable> { In the reducer, the "ones" are counted (just like any other MR example that does this), and then emits a Put. - + public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable> { public static final byte[] CF = "cf".getBytes(); public static final byte[] COUNT = "count".getBytes(); @@ -1199,7 +1199,7 @@ public static class MyTableReducer extends TableReducer<Text, IntWritable, Im This very similar to the summary example above, with exception that this is using HBase as a MapReduce source but HDFS as the sink. The differences are in the job setup and in the reducer. The mapper remains the same. - + Configuration config = HBaseConfiguration.create(); Job job = new Job(config,"ExampleSummaryToFile"); job.setJarByClass(MySummaryFileJob.class); // class that contains mapper and reducer @@ -1228,7 +1228,7 @@ if (!b) { As stated above, the previous Mapper can run unchanged with this example. As for the Reducer, it is a "generic" Reducer instead of extending TableMapper and emitting Puts. - + public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { @@ -1268,7 +1268,7 @@ if (!b) { reducers. Neither is right or wrong, it depends on your use-case. Recognize that the more reducers that are assigned to the job, the more simultaneous connections to the RDBMS will be created - this will scale, but only to a point. - + public static class MyRdbmsReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private Connection c = null; @@ -1299,7 +1299,7 @@ if (!b) { Although the framework currently allows one HBase table as input to a MapReduce job, other HBase tables can be accessed as lookup tables, etc., in a MapReduce job via creating an HTable instance in the setup method of the Mapper. - public class MyMapper extends TableMapper<Text, LongWritable> { + public class MyMapper extends TableMapper<Text, LongWritable> { private HTable myOtherTable; public void setup(Context context) { @@ -1519,11 +1519,11 @@ if (!b) { xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HBaseConfiguration">HBaseConfiguration instance. This will ensure sharing of ZooKeeper and socket instances to the RegionServers which is usually what you want. For example, this is preferred: - HBaseConfiguration conf = HBaseConfiguration.create(); + HBaseConfiguration conf = HBaseConfiguration.create(); HTable table1 = new HTable(conf, "myTable"); HTable table2 = new HTable(conf, "myTable"); as opposed to this: - HBaseConfiguration conf1 = HBaseConfiguration.create(); + HBaseConfiguration conf1 = HBaseConfiguration.create(); HTable table1 = new HTable(conf1, "myTable"); HBaseConfiguration conf2 = HBaseConfiguration.create(); HTable table2 = new HTable(conf2, "myTable"); @@ -1537,7 +1537,7 @@ HTable table2 = new HTable(conf2, "myTable"); the following example: Pre-Creating a <code>HConnection</code> - // Create a connection to the cluster. + // Create a connection to the cluster. HConnection connection = HConnectionManager.createConnection(Configuration); HTableInterface table = connection.getTable("myTable"); // use table as needed, the table returned is lightweight @@ -1594,7 +1594,7 @@ connection.close(); represents a list of Filters with a relationship of FilterList.Operator.MUST_PASS_ALL or FilterList.Operator.MUST_PASS_ONE between the Filters. The following example shows an 'or' between two Filters (checking for either 'my value' or 'my other value' on the same attribute). - + FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE); SingleColumnValueFilter filter1 = new SingleColumnValueFilter( cf, @@ -1627,7 +1627,7 @@ scan.setFilter(list); ), inequality (CompareOp.NOT_EQUAL), or ranges (e.g., CompareOp.GREATER). The following is example of testing equivalence a column to a String value "my value"... - + SingleColumnValueFilter filter = new SingleColumnValueFilter( cf, column, @@ -1650,7 +1650,7 @@ scan.setFilter(filter); RegexStringComparator supports regular expressions for value comparisons. - + RegexStringComparator comp = new RegexStringComparator("my."); // any value that starts with 'my' SingleColumnValueFilter filter = new SingleColumnValueFilter( cf, @@ -1671,7 +1671,7 @@ scan.setFilter(filter); xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/SubstringComparator.html">SubstringComparator can be used to determine if a given substring exists in a value. The comparison is case-insensitive. - + SubstringComparator comp = new SubstringComparator("y val"); // looking for 'my value' SingleColumnValueFilter filter = new SingleColumnValueFilter( cf, @@ -1728,7 +1728,7 @@ scan.setFilter(filter); Note: The same column qualifier can be used in different column families. This filter returns all matching columns. Example: Find all columns in a row and family that start with "abc" - + HTableInterface t = ...; byte[] row = ...; byte[] family = ...; @@ -1758,7 +1758,7 @@ rs.close(); prefixes. It can be used to efficiently get discontinuous sets of columns from very wide rows. Example: Find all columns in a row and family that start with "abc" or "xyz" - + HTableInterface t = ...; byte[] row = ...; byte[] family = ...; @@ -1791,7 +1791,7 @@ rs.close(); filter returns all matching columns. Example: Find all columns in a row and family between "bbbb" (inclusive) and "bbdd" (inclusive) - + HTableInterface t = ...; byte[] row = ...; byte[] family = ...; @@ -2018,7 +2018,7 @@ rs.close(); was accessed. Catalog tables are configured like this. This group is the last one considered during evictions. To mark a column family as in-memory, call - HColumnDescriptor.setInMemory(true); if creating a table from java, + HColumnDescriptor.setInMemory(true); if creating a table from java, or set IN_MEMORY => true when creating or altering a table in the shell: e.g. hbase(main):003:0> create 't', {NAME => 'f', IN_MEMORY => 'true'} @@ -2218,7 +2218,7 @@ rs.close(); Next, add the following configuration to the RegionServer's hbase-site.xml. - + hbase.bucketcache.ioengine offheap @@ -2461,7 +2461,7 @@ rs.close(); ZooKeeper splitlog node (/hbase/splitlog) as tasks. You can view the contents of the splitlog by issuing the following zkcli command. Example output is shown. - ls /hbase/splitlog + ls /hbase/splitlog [hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost8.sample.com%2C57020%2C1340474893275-splitting%2Fhost8.sample.com%253A57020.1340474893900, hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost3.sample.com%2C57020%2C1340474893299-splitting%2Fhost3.sample.com%253A57020.1340474893931, hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost4.sample.com%2C57020%2C1340474893287-splitting%2Fhost4.sample.com%253A57020.1340474893946] @@ -2846,7 +2846,7 @@ ctime = Sat Jun 23 11:13:40 PDT 2012 Typically a custom split policy should extend HBase's default split policy: ConstantSizeRegionSplitPolicy. The policy can set globally through the HBaseConfiguration used or on a per table basis: - + HTableDescriptor myHtd = ...; myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName()); @@ -2867,7 +2867,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName( opens merged region on the regionserver and reports the merge to Master at last. An example of region merges in the hbase shell - $ hbase> merge_region 'ENCODED_REGIONNAME', 'ENCODED_REGIONNAME' + $ hbase> merge_region 'ENCODED_REGIONNAME', 'ENCODED_REGIONNAME' hbase> merge_region 'ENCODED_REGIONNAME', 'ENCODED_REGIONNAME', true It's an asynchronous operation and call returns immediately without waiting merge completed. @@ -2969,10 +2969,10 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName( To view a textualized version of hfile content, you can do use the org.apache.hadoop.hbase.io.hfile.HFile - tool. Type the following to see usage:$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile For + tool. Type the following to see usage:$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile For example, to view the content of the file hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475, - type the following: $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile -v -f hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475 If + type the following: $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile -v -f hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475 If you leave off the option -v to see just a summary on the hfile. See usage for other things to do with the HFile tool. @@ -3818,7 +3818,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName( Run one of following commands in the HBase shell. Replace the table name orders_table with the name of your table. - + alter 'orders_table', CONFIGURATION => {'hbase.hstore.engine.class' => 'org.apache.hadoop.hbase.regionserver.StripeStoreEngine', 'hbase.hstore.blockingStoreFiles' => '100'} alter 'orders_table', {NAME => 'blobs_cf', CONFIGURATION => {'hbase.hstore.engine.class' => 'org.apache.hadoop.hbase.regionserver.StripeStoreEngine', 'hbase.hstore.blockingStoreFiles' => '100'}} create 'orders_table', 'blobs_cf', CONFIGURATION => {'hbase.hstore.engine.class' => 'org.apache.hadoop.hbase.regionserver.StripeStoreEngine', 'hbase.hstore.blockingStoreFiles' => '100'} @@ -3842,7 +3842,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName( Set the hbase.hstore.engine.class option to either nil or org.apache.hadoop.hbase.regionserver.DefaultStoreEngine. Either option has the same effect. - + alter 'orders_table', CONFIGURATION => {'hbase.hstore.engine.class' => ''} @@ -3861,7 +3861,7 @@ myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName( column family, after disabling the table. If you use HBase shell, the general command pattern is as follows: - + alter 'orders_table', CONFIGURATION => {'key' => 'value', ..., 'key' => 'value'}}
{'key' => 'value', ..., 'key' => 'value'} where importtsv or your MapReduce job put its results, and the table name to import into. For example: - $ hadoop jar hbase-VERSION.jar completebulkload [-c /path/to/hbase/config/hbase-site.xml] /user/todd/myoutput mytable + $ hadoop jar hbase-VERSION.jar completebulkload [-c /path/to/hbase/config/hbase-site.xml] /user/todd/myoutput mytable The -c config-file option can be used to specify a file containing the appropriate hbase parameters (e.g., hbase-site.xml) if @@ -4143,7 +4143,7 @@ alter 'orders_table', CONFIGURATION => {'key' => 'value', ..., 'key' => 'value'} Timeline Consistency With this feature, HBase introduces a Consistency definition, which can be provided per read operation (get or scan). - + public enum Consistency { STRONG, TIMELINE @@ -4254,7 +4254,7 @@ public enum Consistency {
Server side properties - hbase.regionserver.storefile.refresh.period 0 @@ -4274,7 +4274,7 @@ public enum Consistency { Client side properties Ensure to set the following for all clients (and servers) that will use region replicas. - hbase.ipc.client.allowsInterrupt true @@ -4325,7 +4325,7 @@ flush 't1'
Java - get 't1','r6', {CONSISTENCY => "TIMELINE"} ]]> You can simulate a region server pausing or becoming unavailable and do a read from the secondary replica: - hbase(main):001:0> get 't1','r6', {CONSISTENCY => "TIMELINE"} @@ -4376,14 +4376,14 @@ hbase> scan 't1', {CONSISTENCY => 'TIMELINE'} Java You can set set the consistency for Gets and Scans and do requests as follows. - You can also pass multiple gets: - And Scans: - You can inspect whether the results are coming from primary region or not by calling the Result.isStale() method: - Running hbck to identify inconsistencies To check to see if your HBase cluster has corruptions, run hbck against your HBase cluster: - + $ ./bin/hbase hbck @@ -4661,13 +4661,13 @@ A run of hbck will report a list of inconsistencies along with a brief descripti tables affected. The using the -details option will report more details including a representative listing of all the splits present in all the tables. - + $ ./bin/hbase hbck -details If you just want to know if some tables are corrupted, you can limit hbck to identify inconsistencies in only specific tables. For example the following command would only attempt to check table TableFoo and TableBar. The benefit is that hbck will run in less time. - + $ ./bin/hbase hbck TableFoo TableBar
@@ -4726,12 +4726,12 @@ assigned or multiply assigned regions.
To fix deployment and assignment problems you can run this command: - + $ ./bin/hbase hbck -fixAssignments To fix deployment and assignment problems as well as repairing incorrect meta rows you can run this command: - + $ ./bin/hbase hbck -fixAssignments -fixMeta There are a few classes of table integrity problems that are low risk repairs. The first two are @@ -4743,12 +4743,12 @@ The third low-risk class is hdfs region holes. This can be repaired by using the If holes are detected you can use -fixHdfsHoles and should include -fixMeta and -fixAssignments to make the new region consistent. - + $ ./bin/hbase hbck -fixAssignments -fixMeta -fixHdfsHoles Since this is a common operation, we’ve added a the -repairHoles flag that is equivalent to the previous command: - + $ ./bin/hbase hbck -repairHoles If inconsistencies still remain after these steps, you most likely have table integrity problems @@ -4800,14 +4800,14 @@ integrity options. Finally, there are safeguards to limit repairs to only specific tables. For example the following command would only attempt to check and repair table TableFoo and TableBar. - + $ ./bin/hbase hbck -repair TableFoo TableBar
Special cases: Meta is not properly assigned There are a few special cases that hbck can handle as well. Sometimes the meta table’s only region is inconsistently assigned or deployed. In this case there is a special -fixMetaOnly option that can try to fix meta assignments. - + $ ./bin/hbase hbck -fixMetaOnly -fixAssignments
@@ -4825,7 +4825,7 @@ directory, loads as much information from region metadata files (.regioninfo fil from the file system. If the region metadata has proper table integrity, it sidelines the original root and meta table directories, and builds new ones with pointers to the region directories and their data. - + $ ./bin/hbase org.apache.hadoop.hbase.util.hbck.OfflineMetaRepair NOTE: This tool is not as clever as uberhbck but can be used to bootstrap repairs that uberhbck @@ -5085,7 +5085,7 @@ This option should not normally be used, and it is not in -fixAll. linkend="hbase.native.platform" />), you can make a symbolic link from HBase to the native Hadoop libraries. This assumes the two software installs are colocated. For example, if my 'platform' is Linux-amd64-64: - $ cd $HBASE_HOME + $ cd $HBASE_HOME $ mkdir lib/native $ ln -s $HADOOP_HOME/lib/native lib/native/Linux-amd64-64 Use the compression tool to check that LZ4 is installed on all nodes. Start up (or restart) @@ -5128,7 +5128,7 @@ hbase(main):003:0> alter 'TestTable', {NAME => 'info', COMPRESSION => CompressionTest You can use the CompressionTest tool to verify that your compressor is available to HBase: - + $ hbase org.apache.hadoop.hbase.util.CompressionTest hdfs://host/path/to/hbase snappy
@@ -5192,7 +5192,7 @@ DESCRIPTION ENABLED parameter, usage advice is printed for each option. <command>LoadTestTool</command> Usage - Options: @@ -5248,7 +5248,7 @@ Options: Example Usage of LoadTestTool - + $ hbase org.apache.hadoop.hbase.util.LoadTestTool -write 1:10:100 -num_keys 1000000 -read 100:30 -num_tables 1 -data_block_encoding NONE -tn load_test_tool_NONE diff --git src/main/docbkx/case_studies.xml src/main/docbkx/case_studies.xml index 7824c7d..332caf8 100644 --- src/main/docbkx/case_studies.xml +++ src/main/docbkx/case_studies.xml @@ -145,7 +145,7 @@ some unusual anomalies, namely interface errors, overruns, framing errors. While not unheard of, these kinds of errors are exceedingly rare on modern hardware which is operating as it should: - + $ /sbin/ifconfig bond0 bond0 Link encap:Ethernet HWaddr 00:00:00:00:00:00 inet addr:10.x.x.x Bcast:10.x.x.255 Mask:255.255.255.0 @@ -160,7 +160,7 @@ RX bytes:2416328868676 (2.4 TB) TX bytes:3464991094001 (3.4 TB) running an ICMP ping from an external host and observing round-trip-time in excess of 700ms, and by running ethtool(8) on the members of the bond interface and discovering that the active interface was operating at 100Mbs/, full duplex. - + $ sudo ethtool eth0 Settings for eth0: Supported ports: [ TP ] diff --git src/main/docbkx/configuration.xml src/main/docbkx/configuration.xml index b0b2864..5949b0a 100644 --- src/main/docbkx/configuration.xml +++ src/main/docbkx/configuration.xml @@ -520,16 +520,16 @@ Index: pom.xml Type the following commands: - + - + Building against the hadoop 2 profile by running something like the following command: - $ mvn clean install assembly:single -Dhadoop.profile=2.0 -DskipTests + $ mvn clean install assembly:single -Dhadoop.profile=2.0 -DskipTests S S @@ -615,7 +615,7 @@ Index: pom.xml hbase-site.xml -- and on the serverside in hdfs-site.xml (The sync facility HBase needs is a subset of the append code path). - dfs.support.append true @@ -644,7 +644,7 @@ Index: pom.xml Hadoop's conf/hdfs-site.xml, setting the dfs.datanode.max.transfer.threads value to at least the following: - dfs.datanode.max.transfer.threads 4096 @@ -779,7 +779,7 @@ Index: pom.xml configuration parameters. Most HBase configuration directives have default values, which are used unless the value is overridden in the hbase-site.xml. See for more information. - hbase.rootdir @@ -891,7 +891,7 @@ node-c.example.com finally disable and drop your tables. To stop HBase after exiting the HBase shell enter - $ ./bin/stop-hbase.sh + $ ./bin/stop-hbase.sh stopping hbase............... Shutdown can take a moment to complete. It can take longer if your cluster is comprised of many machines. If you are running a distributed operation, be sure to wait until HBase @@ -1063,7 +1063,7 @@ slf4j-log4j (slf4j-log4j12-1.5.8.jar) zookeeper (zookeeper-3.4.2.jar) An example basic hbase-site.xml for client only might look as - follows: @@ -1090,7 +1090,7 @@ zookeeper (zookeeper-3.4.2.jar) hbase.X.X.X.jar). It is also possible to specify configuration directly without having to read from a hbase-site.xml. For example, to set the ZooKeeper ensemble for the cluster programmatically do as follows: - Configuration config = HBaseConfiguration.create(); + Configuration config = HBaseConfiguration.create(); config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zookeeper locally If multiple ZooKeeper instances make up your ZooKeeper ensemble, they may be specified in a comma-separated list (just as in the hbase-site.xml file). This @@ -1126,7 +1126,7 @@ config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zooke xml:id="hbase_site"> <filename>hbase-site.xml</filename> - + @@ -1140,7 +1140,7 @@ config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zooke hbase.zookeeper.property.dataDir /export/zookeeper - Property from ZooKeeper's config zoo.cfg. + Property from ZooKeeper config zoo.cfg. The directory where the snapshot is stored. @@ -1191,7 +1191,7 @@ example9 hbase-env.sh file. Here we are setting the HBase heap to be 4G instead of the default 1G. - + $ git diff hbase-env.sh diff --git a/conf/hbase-env.sh b/conf/hbase-env.sh @@ -1476,7 +1476,7 @@ index e70ebc6..96f8c27 100644 running on a late-version HDFS so you have the fixes he refers too and himself adds to HDFS that help HBase MTTR (e.g. HDFS-3703, HDFS-3712, and HDFS-4791 -- hadoop 2 for sure has them and late hadoop 1 has some). Set the following in the RegionServer. - + hbase.lease.recovery.dfs.timeout @@ -1493,7 +1493,7 @@ index e70ebc6..96f8c27 100644 And on the namenode/datanode side, set the following to enable 'staleness' introduced in HDFS-3703, HDFS-3912. - dfs.client.socket-timeout 10000 @@ -1550,7 +1550,7 @@ index e70ebc6..96f8c27 100644 As an alternative, You can use the coprocessor-based JMX implementation provided by HBase. To enable it in 0.99 or above, add below property in hbase-site.xml: - hbase.coprocessor.regionserver.classes org.apache.hadoop.hbase.JMXListener @@ -1566,7 +1566,7 @@ index e70ebc6..96f8c27 100644 By default, the JMX listens on TCP port 10102, you can further configure the port using below properties: - regionserver.rmi.registry.port 61130 @@ -1584,7 +1584,7 @@ index e70ebc6..96f8c27 100644 By default the password authentication and SSL communication is disabled. To enable password authentication, you need to update hbase-env.sh like below: - + export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.authenticate=true \ -Dcom.sun.management.jmxremote.password.file=your_password_file \ -Dcom.sun.management.jmxremote.access.file=your_access_file" @@ -1596,7 +1596,7 @@ export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE " To enable SSL communication with password authentication, follow below steps: - + #1. generate a key pair, stored in myKeyStore keytool -genkey -alias jconsole -keystore myKeyStore @@ -1607,10 +1607,10 @@ keytool -export -alias jconsole -keystore myKeyStore -file jconsole.cert keytool -import -alias jconsole -keystore jconsoleKeyStore -file jconsole.cert And then update hbase-env.sh like below: - + export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=true \ -Djavax.net.ssl.keyStore=/home/tianq/myKeyStore \ - -Djavax.net.ssl.keyStorePassword=your_password_in_step_#1 \ + -Djavax.net.ssl.keyStorePassword=your_password_in_step_1 \ -Dcom.sun.management.jmxremote.authenticate=true \ -Dcom.sun.management.jmxremote.password.file=your_password file \ -Dcom.sun.management.jmxremote.access.file=your_access_file" @@ -1620,13 +1620,13 @@ export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE " Finally start jconsole on client using the key store: - + jconsole -J-Djavax.net.ssl.trustStore=/home/tianq/jconsoleKeyStore NOTE: for HBase 0.98, To enable the HBase JMX implementation on Master, you also need to add below property in hbase-site.xml: - hbase.coprocessor.master.classes org.apache.hadoop.hbase.JMXListener diff --git src/main/docbkx/cp.xml src/main/docbkx/cp.xml index 9cc0859..7062d7d 100644 --- src/main/docbkx/cp.xml +++ src/main/docbkx/cp.xml @@ -265,7 +265,7 @@ Example RegionObserver Configuration In this example, one RegionObserver is configured for all the HBase tables. - hbase.coprocessor.region.classes org.apache.hadoop.hbase.coprocessor.AggregateImplementation diff --git src/main/docbkx/customization.xsl src/main/docbkx/customization.xsl index 43d8df7..5d0ec2c 100644 --- src/main/docbkx/customization.xsl +++ src/main/docbkx/customization.xsl @@ -22,6 +22,7 @@ */ --> + diff --git src/main/docbkx/developer.xml src/main/docbkx/developer.xml index 3d003f1..93d8a61 100644 --- src/main/docbkx/developer.xml +++ src/main/docbkx/developer.xml @@ -90,9 +90,9 @@
Import into eclipse with the command line For those not inclined to use m2eclipse, you can generate the Eclipse files from the command line. First, run (you should only have to do this once): - mvn clean install -DskipTests + mvn clean install -DskipTests and then close Eclipse and execute... - mvn eclipse:eclipse + mvn eclipse:eclipse ... from your local HBase project directory in your workspace to generate some new .project and .classpathfiles. Then reopen Eclipse, or refresh your eclipse project (F5), and import the .project file in the HBase directory to a workspace. @@ -136,11 +136,11 @@ Access restriction: The method getLong(Object, long) from the type Unsafe is not Basic Compile Thanks to maven, building HBase is pretty easy. You can read about the various maven commands in , but the simplest command to compile HBase from its java source code is: - + mvn package -DskipTests Or, to clean up before compiling: - + mvn clean package -DskipTests With Eclipse set up as explained above in , you can also simply use the build command in Eclipse. @@ -152,14 +152,14 @@ mvn clean package -DskipTests The protobuf files are located hbase-protocol/src/main/protobuf. For the change to be effective, you will need to regenerate the classes. You can use maven profile compile-protobuf to do this. - + mvn compile -Dcompile-protobuf or mvn compile -Pcompile-protobuf You may also want to define protoc.path for the protoc binary - + mvn compile -Dcompile-protobuf -Dprotoc.path=/opt/local/bin/protoc Read the hbase-protocol/README.txt for more details. @@ -212,7 +212,7 @@ mvn compile -Dcompile-protobuf -Dprotoc.path=/opt/local/bin/protoc build do this for you, you need to make sure you have a properly configured settings.xml in your local repository under .m2. Here is my ~/.m2/settings.xml. - @@ -287,7 +287,7 @@ under the respective release documentation folders. publish a SNAPSHOT, you must keep the -SNAPSHOT suffix on the hbase version. The Versions Maven Plugin can be of use here. To set a version in all the many poms of the hbase multi-module project, do something like this: - $ mvn clean org.codehaus.mojo:versions-maven-plugin:1.3.1:set -DnewVersion=0.96.0 + $ mvn clean org.codehaus.mojo:versions-maven-plugin:1.3.1:set -DnewVersion=0.96.0 Checkin the CHANGES.txt and any version changes. @@ -296,7 +296,7 @@ under the respective release documentation folders. Now, build the src tarball. This tarball is hadoop version independent. It is just the pure src code and documentation without a particular hadoop taint, etc. Add the -Prelease profile when building; it checks files for licenses and will fail the build if unlicensed files present. - $ MAVEN_OPTS="-Xmx2g" mvn clean install -DskipTests assembly:single -Dassembly.file=hbase-assembly/src/main/assembly/src.xml -Prelease + $ MAVEN_OPTS="-Xmx2g" mvn clean install -DskipTests assembly:single -Dassembly.file=hbase-assembly/src/main/assembly/src.xml -Prelease Undo the tarball and make sure it looks good. A good test for the src tarball being 'complete' is to see if you can build new tarballs from this source bundle. If the source tarball is good, save it off to a version directory, i.e a directory somewhere where you are collecting @@ -309,7 +309,7 @@ under the respective release documentation folders. Do it in two steps. First install into the local repository and then generate documentation and assemble the tarball (Otherwise build complains that hbase modules are not in maven repo when we try to do it all in the one go especially on fresh repo). It seems that you need the install goal in both steps. - $ MAVEN_OPTS="-Xmx3g" mvn clean install -DskipTests -Prelease + $ MAVEN_OPTS="-Xmx3g" mvn clean install -DskipTests -Prelease $ MAVEN_OPTS="-Xmx3g" mvn install -DskipTests site assembly:single -Prelease Undo the generated tarball and check it out. Look at doc. and see if it runs, etc. If good, copy the tarball to the above mentioned version directory. @@ -320,7 +320,7 @@ If good, copy the tarball to the above mentioned version directoryapache-release profile instead of just release profile when doing mvn deploy; it will invoke the apache pom referenced by our poms. It will also sign your artifacts published to mvn as long as your settings.xml in your local .m2 repository is configured correctly (your settings.xml adds your gpg password property to the apache profile). -$ MAVEN_OPTS="-Xmx3g" mvn deploy -DskipTests -Papache-release +$ MAVEN_OPTS="-Xmx3g" mvn deploy -DskipTests -Papache-release The last command above copies all artifacts up to a temporary staging apache mvn repo in an 'open' state. We'll need to do more work on these maven artifacts to make them generally available. @@ -379,7 +379,7 @@ or borked, just delete the 'open' staged artifacts. If all checks out, next put the version directory up on people.apache.org. You will need to sign and fingerprint them before you push them up. In the version directory do this: - $ for i in *.tar.gz; do echo $i; gpg --print-mds $i > $i.mds ; done + $ for i in *.tar.gz; do echo $i; gpg --print-mds $i > $i.mds ; done $ for i in *.tar.gz; do echo $i; gpg --armor --output $i.asc --detach-sig $i ; done $ cd .. # Presuming our 'version directory' is named 0.96.0RC0, now copy it up to people.apache.org. @@ -396,7 +396,7 @@ $ rsync -av 0.96.0RC0 people.apache.org:public_html Make sure your settings.xml is set up properly (see above for how). Make sure the hbase version includes -SNAPSHOT as a suffix. Here is how I published SNAPSHOTS of a release that had an hbase version of 0.96.0 in its poms. - $ MAVEN_OPTS="-Xmx3g" mvn clean install -DskipTests javadoc:aggregate site assembly:single -Prelease + $ MAVEN_OPTS="-Xmx3g" mvn clean install -DskipTests javadoc:aggregate site assembly:single -Prelease $ MAVEN_OPTS="-Xmx3g" mvn -DskipTests deploy -Papache-release The make_rc.sh script mentioned above in the @@ -436,7 +436,7 @@ $ rsync -av 0.96.0RC0 people.apache.org:public_html (see ). Your Jira should contain a summary of the changes in each section (see HBASE-6081 for an example). To generate the site locally while you're working on it, run: - mvn site + mvn site Then you can load up the generated HTML files in your browser (file are under /target/site).
@@ -446,14 +446,14 @@ $ rsync -av 0.96.0RC0 people.apache.org:public_html Finally, check it in. For example, if trunk is checked out out at /Users/stack/checkouts/trunk and the hbase website, hbase.apache.org, is checked out at /Users/stack/checkouts/hbase.apache.org/trunk, to update the site, do the following: - + # Build the site and deploy it to the checked out directory # Getting the javadoc into site is a little tricky. You have to build it before you invoke 'site'. $ MAVEN_OPTS=" -Xmx3g" mvn clean install -DskipTests javadoc:aggregate site site:stage -DstagingDirectory=/Users/stack/checkouts/hbase.apache.org/trunk Now check the deployed site by viewing in a brower, browse to file:////Users/stack/checkouts/hbase.apache.org/trunk/index.html and check all is good. If all checks out, commit it and your new build will show up immediately at http://hbase.apache.org - + $ cd /Users/stack/checkouts/hbase.apache.org/trunk $ svn status # Do an svn add of any new content... @@ -500,16 +500,16 @@ HBase have a character not usually seen in other projects. Running Tests in other Modules If the module you are developing in has no other dependencies on other HBase modules, then you can cd into that module and just run: - mvn test + mvn test which will just run the tests IN THAT MODULE. If there are other dependencies on other modules, then you will have run the command from the ROOT HBASE DIRECTORY. This will run the tests in the other modules, unless you specify to skip the tests in that module. For instance, to skip the tests in the hbase-server module, you would run: - mvn clean test -PskipServerTests + mvn clean test -PskipServerTests from the top level directory to run all the tests in modules other than hbase-server. Note that you can specify to skip tests in multiple modules as well as just for a single module. For example, to skip the tests in hbase-server and hbase-common, you would run: - mvn clean test -PskipServerTests -PskipCommonTests + mvn clean test -PskipServerTests -PskipCommonTests Also, keep in mind that if you are running tests in the hbase-server module you will need to apply the maven profiles discussed in to get the tests to run properly.
@@ -522,7 +522,7 @@ integration with corresponding JUnit ... @Category(SmallTests.class) public class TestHRegionInfo { @Test @@ -589,7 +589,7 @@ public class TestHRegionInfo {
Default: small and medium category tests - Running mvn test will execute all small tests + Running mvn test will execute all small tests in a single JVM (no fork) and then medium tests in a separate JVM for each test instance. Medium tests are NOT executed if there is an error in a small test. Large tests are NOT executed. There is one report for small tests, and one @@ -599,7 +599,7 @@ public class TestHRegionInfo {
Running all tests - Running mvn test -P runAllTests will execute + Running mvn test -P runAllTests will execute small tests in a single JVM then medium and large tests in a separate JVM for each test. Medium and large tests are NOT executed if there is an error in a small test. Large tests are NOT executed if there is an error in a small or @@ -611,11 +611,11 @@ public class TestHRegionInfo { xml:id="hbase.unittests.cmds.test.localtests.mytest"> Running a single test or all tests in a package To run an individual test, e.g. MyTest, do - mvn test -Dtest=MyTest You can also pass + mvn test -Dtest=MyTest You can also pass multiple, individual tests as a comma-delimited list: - mvn test -Dtest=MyTest1,MyTest2,MyTest3 You can + mvn test -Dtest=MyTest1,MyTest2,MyTest3 You can also pass a package, which will run all tests under the package: - mvn test '-Dtest=org.apache.hadoop.hbase.client.*' + mvn test '-Dtest=org.apache.hadoop.hbase.client.*' When -Dtest is specified, localTests profile will @@ -656,10 +656,10 @@ public class TestHRegionInfo { can as well use a ramdisk. You will need 2Gb of memory to run all tests. You will also need to delete the files between two test run. The typical way to configure a ramdisk on Linux is: - $ sudo mkdir /ram2G + $ sudo mkdir /ram2G sudo mount -t tmpfs -o size=2048M tmpfs /ram2G You can then use it to run all HBase tests with the command: - mvn test + mvn test -P runAllTests -Dsurefire.secondPartThreadCount=12 -Dtest.build.data.basedirectory=/ram2G
@@ -848,7 +848,7 @@ ConnectionCount=1 (was 1) tests that are in the HBase integration test group. After you have completed mvn install -DskipTests You can run just the integration tests by invoking:
- + cd hbase-it mvn verify If you just want to run the integration tests in top-level, you need to run @@ -890,9 +890,9 @@ mvn verify If you have an already-setup HBase cluster, you can launch the integration tests by invoking the class IntegrationTestsDriver. You may have to run test-compile first. The configuration will be picked by the bin/hbase - script. mvn test-compile Then launch the tests + script. mvn test-compile Then launch the tests with: - bin/hbase [--config config_dir] org.apache.hadoop.hbase.IntegrationTestsDriver + bin/hbase [--config config_dir] org.apache.hadoop.hbase.IntegrationTestsDriver Pass -h to get usage on this sweet tool. Running the IntegrationTestsDriver without any argument will launch tests found under hbase-it/src/test, having @@ -968,7 +968,7 @@ mvn verify ChaosMonkey uses the configuration from the bin/hbase script, thus no extra configuration needs to be done. You can invoke the ChaosMonkey by running: - bin/hbase org.apache.hadoop.hbase.util.ChaosMonkey + bin/hbase org.apache.hadoop.hbase.util.ChaosMonkey This will output smt like: 12/11/19 23:21:57 INFO util.ChaosMonkey: Using ChaosMonkey Policy: class org.apache.hadoop.hbase.util.ChaosMonkey$PeriodicRandomActionPolicy, period:60000 @@ -1021,7 +1021,7 @@ As you can see from the log, ChaosMonkey started the default PeriodicRandomActio org.apache.hadoop.hbase.chaos.factories.MonkeyConstants class. If any chaos monkey configuration is missing from the property file, then the default values are assumed. For example:
- + $bin/hbase org.apache.hadoop.hbase.IntegrationTestIngest -m slowDeterministic -monkeyProps monkey.properties The above command will start the integration tests and chaos monkey passing the properties file monkey.properties. @@ -1046,7 +1046,7 @@ batch.restart.rs.ratio=0.4f
Compile - + mvn compile
@@ -1063,7 +1063,7 @@ mvn compile By default, in 0.96 and earlier, we will build with Hadoop-1.0.x. As of 0.98, Hadoop 1.x is deprecated and Hadoop 2.x is the default. To change the version to build against, add a hadoop.profile property when you invoke mvn: - mvn -Dhadoop.profile=1.0 ... + mvn -Dhadoop.profile=1.0 ... The above will build against whatever explicit hadoop 1.x version we have in our pom.xml as our '1.0' version. Tests may not all pass so you may need to pass -DskipTests unless you are inclined to fix the failing tests. @@ -1083,7 +1083,7 @@ pecularity that is probably fixable but we've not spent the time trying to figur In earilier versions of Apache HBase, you can build against older versions of Apache Hadoop, notably, Hadoop 0.22.x and 0.23.x. If you are running, for example HBase-0.94 and wanted to build against Hadoop 0.23.x, you would run with: - mvn -Dhadoop.profile=22 ... + mvn -Dhadoop.profile=22 ...
@@ -1154,7 +1154,7 @@ pecularity that is probably fixable but we've not spent the time trying to figur HBase uses JUnit 4 for unit tests This example will add unit tests to the following example class: - + public class MyHBaseDAO { public static void insertRecord(HTableInterface table, HBaseTestObj obj) @@ -1174,7 +1174,7 @@ public class MyHBaseDAO { } The first step is to add JUnit dependencies to your Maven POM file: - junit junit @@ -1184,7 +1184,7 @@ public class MyHBaseDAO { ]]> Next, add some unit tests to your code. Tests are annotated with @Test. Here, the unit tests are in bold. - + public class TestMyHbaseDAOData { @Test public void testCreatePut() throws Exception { @@ -1222,7 +1222,7 @@ public class TestMyHbaseDAOData { linkend="unit.tests" />, to test the insertRecord method.
First, add a dependency for Mockito to your Maven POM file. - org.mockito mockito-all @@ -1232,7 +1232,7 @@ public class TestMyHbaseDAOData { ]]> Next, add a @RunWith annotation to your test class, to direct it to use Mockito. - + @RunWith(MockitoJUnitRunner.class) public class TestMyHBaseDAO{ @Mock @@ -1283,7 +1283,7 @@ public class TestMyHBaseDAO{ MyTest, which has one column family called CF, the reducer of such a job could look like the following: - { public static final byte[] CF = "CF".getBytes(); public static final byte[] QUALIFIER = "CQ-1".getBytes(); @@ -1304,7 +1304,7 @@ public class MyReducer extends TableReducer To test this code, the first step is to add a dependency to MRUnit to your Maven POM file. - org.apache.mrunit mrunit @@ -1313,7 +1313,7 @@ public class MyReducer extends TableReducer ]]> Next, use the ReducerDriver provided by MRUnit, in your Reducer job. - reduceDriver; byte[] CF = "CF".getBytes(); @@ -1367,7 +1367,7 @@ strValue2 = "DATA2"; tests using a mini-cluster. The first step is to add some dependencies to your Maven POM file. Check the versions to be sure they are appropriate. - org.apache.hadoop hadoop-common @@ -1401,7 +1401,7 @@ strValue2 = "DATA2"; ]]> This code represents an integration test for the MyDAO insert shown in . - + public class MyHBaseIntegrationTest { private static HBaseTestingUtility utility; byte[] CF = "CF".getBytes(); @@ -1567,12 +1567,12 @@ public class MyHBaseIntegrationTest { If you are developing Apache HBase, frequently it is useful to test your changes against a more-real cluster than what you find in unit tests. In this case, HBase can be run directly from the source in local-mode. All you need to do is run: - ${HBASE_HOME}/bin/start-hbase.sh + ${HBASE_HOME}/bin/start-hbase.sh This will spin up a full local-cluster, just as if you had packaged up HBase and installed it on your machine. Keep in mind that you will need to have installed HBase into your local maven repository for the in-situ cluster to work properly. That is, you will need to run: - mvn clean install -DskipTests + mvn clean install -DskipTests to ensure that maven can find the correct classpath and dependencies. Generally, the above command is just a good thing to try running first, if maven is acting oddly. @@ -1638,7 +1638,7 @@ public class MyHBaseIntegrationTest { selected resource when generating the patch is a directory. Patch files can reflect changes in multiple files. Generating patches using git: -$ git diff --no-prefix > HBASE_XXXX.patch +$ git diff --no-prefix > HBASE_XXXX.patch Don't forget the 'no-prefix' option; and generate the diff from the root directory of project @@ -1686,20 +1686,20 @@ public class MyHBaseIntegrationTest {
Space Invaders Rather than do this... - + if ( foo.equals( bar ) ) { // don't do this ... do this instead... - + if (foo.equals(bar)) { Also, rather than do this... - + foo = barArray[ i ]; // don't do this ... do this instead... - + foo = barArray[i]; @@ -1707,12 +1707,12 @@ foo = barArray[i];
Auto Generated Code Auto-generated code in Eclipse often looks like this... - + public void readFields(DataInput arg0) throws IOException { // don't do this foo = arg0.readUTF(); // don't do this ... do this instead ... - + public void readFields(DataInput di) throws IOException { foo = di.readUTF(); @@ -1723,11 +1723,11 @@ foo = barArray[i]; Long Lines Keep lines less than 100 characters. - + Bar bar = foo.veryLongMethodWithManyArguments(argument1, argument2, argument3, argument4, argument5, argument6, argument7, argument8, argument9); // don't do this ... do something like this instead ... - + Bar bar = foo.veryLongMethodWithManyArguments( argument1, argument2, argument3,argument4, argument5, argument6, argument7, argument8, argument9); @@ -1737,8 +1737,8 @@ Bar bar = foo.veryLongMethodWithManyArguments( Trailing Spaces This happens more than people would imagine. - -Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the semicolon instead of a line break. + +Bar bar = foo.getBar(); <--- imagine there is an extra space(s) after the semicolon instead of a line break. Make sure there's a line-break after the end of your code, and also avoid lines that have nothing but whitespace. @@ -1772,7 +1772,7 @@ Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the findbugs files locally. Sometimes, you may have to write code smarter than Findbugs. You can annotate your code to tell Findbugs you know what you're doing, by annotating your class with: - @edu.umd.cs.findbugs.annotations.SuppressWarnings( + @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="HE_EQUALS_USE_HASHCODE", justification="I know what I'm doing") @@ -1785,7 +1785,7 @@ Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the Javadoc - Useless Defaults Don't just leave the @param arguments the way your IDE generated them. Don't do this... - + /** * * @param bar <---- don't do this!!!! @@ -1853,7 +1853,7 @@ Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the patch 1: - $ git diff --no-prefix > HBASE_XXXX-1.patch + $ git diff --no-prefix > HBASE_XXXX-1.patch @@ -1862,12 +1862,12 @@ Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the create a new git branch - $ git checkout -b my_branch + $ git checkout -b my_branch save your work - $ git add file1 file2 - $ git commit -am 'saved after HBASE_XXXX-1.patch' + $ git add file1 file2 + $ git commit -am 'saved after HBASE_XXXX-1.patch' now you have your own branch, that is different from remote master branch @@ -1876,7 +1876,7 @@ Bar bar = foo.getBar(); <--- imagine there's an extra space(s) after the create second patch - $ git diff --no-prefix > HBASE_XXXX-2.patch + $ git diff --no-prefix > HBASE_XXXX-2.patch diff --git src/main/docbkx/getting_started.xml src/main/docbkx/getting_started.xml index b4e9911..fcf71f2 100644 --- src/main/docbkx/getting_started.xml +++ src/main/docbkx/getting_started.xml @@ -111,7 +111,7 @@ Extract the downloaded file, and change to the newly-created directory. - + $ tar xzvf hbase-]]>-hadoop2-bin.tar.gz $ cd hbase-]]>-hadoop2/ @@ -127,7 +127,7 @@ $ cd hbase-]]>-hadoop2/ <configuration> tags, which should be empty in a new HBase install. Example <filename>hbase-site.xml</filename> for Standalone HBase - hbase.rootdir @@ -168,7 +168,7 @@ $ cd hbase-]]>-hadoop2/ install. In this example, some usage and version information that is printed when you start HBase Shell has been omitted. The HBase Shell prompt ends with a > character. - + $ ./bin/hbase shell hbase(main):001:0> @@ -283,7 +283,7 @@ hbase> drop 'test' In the same way that the bin/start-hbase.sh script is provided to conveniently start all HBase daemons, the bin/stop-hbase.sh script stops them. - + $ ./bin/stop-hbase.sh stopping hbase.................... $ @@ -335,7 +335,7 @@ $ property hbase.master.wait.on.regionservers.mintostart should be set to 1 (Its default is changed to 2 since version 1.0.0). - hbase.cluster.distributed true @@ -348,7 +348,7 @@ $ Next, change the hbase.rootdir from the local filesystem to the address of your HDFS instance, using the hdfs://// URI syntax. In this example, HDFS is running on the localhost at port 8020. - hbase.rootdir hdfs://localhost:8020/hbase @@ -371,7 +371,7 @@ $ configuration above, it is stored in /hbase/ on HDFS. You can use the hadoop fs command in Hadoop's bin/ directory to list this directory. - + $ ./bin/hadoop fs -ls /hbase Found 7 items drwxr-xr-x - hbase users 0 2014-06-25 18:58 /hbase/.tmp @@ -404,7 +404,7 @@ drwxr-xr-x - hbase users 0 2014-06-25 21:49 /hbase/oldWALs using an offset of 2, the backup HMaster would use ports 16012, 16022, and 16032. The following command starts 3 backup servers using ports 16012/16022/16032, 16013/16023/16033, and 16015/16025/16035. - + $ ./bin/local-master-backup.sh 2 3 5 To kill a backup master without killing the entire cluster, you need to find its @@ -413,7 +413,7 @@ $ ./bin/local-master-backup.sh 2 3 5 The only contents of the file are the PID. You can use the kill -9 command to kill that PID. The following command will kill the master with port offset 1, but leave the cluster running: - + $ cat /tmp/hbase-testuser-1-master.pid |xargs kill -9 @@ -432,13 +432,13 @@ $ cat /tmp/hbase-testuser-1-master.pid |xargs kill -9 You can run 99 additional RegionServers that are not a HMaster or backup HMaster, on a server. The following command starts four additional RegionServers, running on sequential ports starting at 16202/16302 (base ports 16200/16300 plus 2). - + $ .bin/local-regionservers.sh start 2 3 4 5 To stop a RegionServer manually, use the local-regionservers.sh command with the stop parameter and the offset of the server to stop. - $ .bin/local-regionservers.sh stop 3 + $ .bin/local-regionservers.sh stop 3 Stop HBase. @@ -510,7 +510,7 @@ $ .bin/local-regionservers.sh start 2 3 4 5 While logged in as the user who will run HBase, generate a SSH key pair, using the following command: - $ ssh-keygen -t rsa + $ ssh-keygen -t rsa If the command succeeds, the location of the key pair is printed to standard output. The default name of the public key is id_rsa.pub. @@ -528,7 +528,7 @@ $ .bin/local-regionservers.sh start 2 3 4 5 not already exist, and append the contents of the id_rsa.pub file to the end of it. Note that you also need to do this for node-a itself. - $ cat id_rsa.pub >> ~/.ssh/authorized_keys + $ cat id_rsa.pub >> ~/.ssh/authorized_keys Test password-less login. @@ -574,7 +574,7 @@ $ .bin/local-regionservers.sh start 2 3 4 5 ZooKeeper instance on each node of the cluster. On node-a, edit conf/hbase-site.xml and add the following properties. - hbase.zookeeper.quorum node-a.example.com,node-b.example.com,node-c.example.com @@ -623,7 +623,7 @@ $ .bin/local-regionservers.sh start 2 3 4 5 Start the cluster. On node-a, issue the start-hbase.sh command. Your output will be similar to that below. - + $ bin/start-hbase.sh node-c.example.com: starting zookeeper, logging to /home/hbuser/hbase-0.98.3-hadoop2/bin/../logs/hbase-hbuser-zookeeper-node-c.example.com.out node-a.example.com: starting zookeeper, logging to /home/hbuser/hbase-0.98.3-hadoop2/bin/../logs/hbase-hbuser-zookeeper-node-a.example.com.out @@ -643,7 +643,7 @@ node-b.example.com: starting master, logging to /home/hbuser/hbase-0.98.3-hadoop running on your servers as well, if they are used for other purposes. <code>node-a</code> <command>jps</command> Output - + $ jps 20355 Jps 20071 HQuorumPeer @@ -652,7 +652,7 @@ $ jps <code>node-b</code> <command>jps</command> Output - + $ jps 15930 HRegionServer 16194 Jps @@ -662,7 +662,7 @@ $ jps <code>node-c</code> <command>jps</command> Output - + $ jps 13901 Jps 13639 HQuorumPeer diff --git src/main/docbkx/hbase_apis.xml src/main/docbkx/hbase_apis.xml index b803269..bc35aba 100644 --- src/main/docbkx/hbase_apis.xml +++ src/main/docbkx/hbase_apis.xml @@ -40,7 +40,7 @@ Create a Table Using Java This example has been tested on HBase 0.96.1.1. - + package com.example.hbase.admin; import java.io.IOException; @@ -90,7 +90,7 @@ public class CreateSchema { Add, Modify, and Delete a Table This example has been tested on HBase 0.96.1.1. - + public static void upgradeFrom0 (Configuration config) { try { diff --git src/main/docbkx/ops_mgt.xml src/main/docbkx/ops_mgt.xml index 5d01f82..fdf4a6a 100644 --- src/main/docbkx/ops_mgt.xml +++ src/main/docbkx/ops_mgt.xml @@ -46,7 +46,7 @@ There is a Canary class can help users to canary-test the HBase cluster status, with every column-family for every regions or regionservers granularity. To see the usage, use the --help parameter. - $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -help + $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -help Usage: bin/hbase org.apache.hadoop.hbase.tool.Canary [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..] where [opts] are: @@ -61,7 +61,7 @@ Usage: bin/hbase org.apache.hadoop.hbase.tool.Canary [opts] [table1 [table2]...] -t <N> timeout for a check, default is 600000 (milliseconds) This tool will return non zero error codes to user for collaborating with other monitoring tools, such as Nagios. The error code definitions are: - private static final int USAGE_EXIT_CODE = 1; + private static final int USAGE_EXIT_CODE = 1; private static final int INIT_ERROR_EXIT_CODE = 2; private static final int TIMEOUT_ERROR_EXIT_CODE = 3; private static final int ERROR_EXIT_CODE = 4; @@ -113,7 +113,7 @@ private static final int ERROR_EXIT_CODE = 4; Following are some examples based on the previous given case.
Canary test for every column family (store) of every region of every table - $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary + $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary 3/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf1 in 2ms 13/12/09 03:26:32 INFO tool.Canary: read from region test-01,,1386230156732.0e3c7d77ffb6361ea1b996ac1042ca9a. column family cf2 in 2ms @@ -134,14 +134,14 @@ private static final int ERROR_EXIT_CODE = 4; Canary test for every column family (store) of every region of specific table(s) You can also test one or more specific tables. - $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary test-01 test-02 + $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary test-01 test-02
Canary test with regionserver granularity This will pick one small piece of data from each regionserver, and can also put your resionserver name as input options for canary-test specific regionservers. - $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -regionserver + $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.tool.Canary -regionserver 13/12/09 06:05:17 INFO tool.Canary: Read from table:test-01 on region server:rs2 in 72ms 13/12/09 06:05:17 INFO tool.Canary: Read from table:test-02 on region server:rs3 in 34ms @@ -150,7 +150,7 @@ private static final int ERROR_EXIT_CODE = 4;
Canary test with regular expression pattern This will test both table test-01 and test-02. - $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -e test-0[1-2] + $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -e test-0[1-2]
@@ -158,10 +158,10 @@ private static final int ERROR_EXIT_CODE = 4; Run repeatedly with interval defined in option -interval whose default value is 6 seconds. This daemon will stop itself and return non-zero error code if any error occurs, due to the default value of option -f is true. - $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -daemon + $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -daemon Run repeatedly with internal 5 seconds and will not stop itself even error occurs in the test. - $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -daemon -interval 50000 -f false + $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -daemon -interval 50000 -f false
@@ -171,7 +171,7 @@ private static final int ERROR_EXIT_CODE = 4; Master, which would bring the clients hung. So we provide the timeout option to kill the canary test forcefully and return non-zero error code as well. This run sets the timeout value to 60 seconds, the default value is 600 seconds. - $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -t 600000 + $ ${HBASE_HOME}/bin/hbase orghapache.hadoop.hbase.tool.Canary -t 600000
@@ -194,7 +194,7 @@ private static final int ERROR_EXIT_CODE = 4;
UtilityName with the utility you want to run. This command assumes you have set the environment variable HBASE_HOME to the directory where HBase is unpacked on your server. - + ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.mapreduce.UtilityName The following utilities are available: @@ -267,13 +267,13 @@ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.mapreduce.UtilityNa recovered.edits. directory. You can get a textual dump of a WAL file content by doing the following: - $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --dump hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012 + $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --dump hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/10.10.21.10%3A60020.1283973724012 The return code will be non-zero if issues with the file so you can test wholesomeness of file by redirecting STDOUT to /dev/null and testing the program return. Similarly you can force a split of a log file directory by doing: - $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --split hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/ + $ ./bin/hbase org.apache.hadoop.hbase.regionserver.wal.FSHLog --split hdfs://example.org:8020/hbase/.logs/example.org,60020,1283516293161/
@@ -297,7 +297,7 @@ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.mapreduce.UtilityNa cluster or another cluster. The target table must first exist. The usage is as follows: - + $ ./bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable --help /bin/hbase org.apache.hadoop.hbase.mapreduce.CopyTable --help Usage: CopyTable [general options] [--starttime=X] [--endtime=Y] [--new.name=NEW] [--peer.adr=ADR] <tablename> @@ -355,7 +355,7 @@ For performance consider the following general options: Export Export is a utility that will dump the contents of table to HDFS in a sequence file. Invoke via: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.Export <tablename> <outputdir> [<versions> [<starttime> [<endtime>]]] + $ bin/hbase org.apache.hadoop.hbase.mapreduce.Export <tablename> <outputdir> [<versions> [<starttime> [<endtime>]]] Note: caching for the input Scan is configured via @@ -366,11 +366,11 @@ For performance consider the following general options: Import Import is a utility that will load data that has been exported back into HBase. Invoke via: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.Import <tablename> <inputdir> + $ bin/hbase org.apache.hadoop.hbase.mapreduce.Import <tablename> <inputdir> To import 0.94 exported files in a 0.96 cluster or onwards, you need to set system property "hbase.import.version" when running the import command as below: - $ bin/hbase -Dhbase.import.version=0.94 org.apache.hadoop.hbase.mapreduce.Import <tablename> <inputdir> + $ bin/hbase -Dhbase.import.version=0.94 org.apache.hadoop.hbase.mapreduce.Import <tablename> <inputdir>
completebulkload. To load data via Puts (i.e., non-bulk loading): - $ bin/hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=a,b,c <tablename> <hdfs-inputdir> + $ bin/hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=a,b,c <tablename> <hdfs-inputdir> To generate StoreFiles for bulk-loading: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=a,b,c -Dimporttsv.bulk.output=hdfs://storefile-outputdir <tablename> <hdfs-data-inputdir> + $ bin/hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.columns=a,b,c -Dimporttsv.bulk.output=hdfs://storefile-outputdir <tablename> <hdfs-data-inputdir> These generated StoreFiles can be loaded into HBase via . @@ -438,7 +438,7 @@ row10 c1 c2 For ImportTsv to use this imput file, the command line needs to look like this: - + HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-VERSION.jar importtsv -Dimporttsv.columns=HBASE_ROW_KEY,d:c1,d:c2 -Dimporttsv.bulk.output=hdfs://storefileoutput datatsv hdfs://inputfile ... and in this example the first column is the rowkey, which is why the @@ -467,10 +467,10 @@ row10 c1 c2 linkend="importtsv" />. There are two ways to invoke this utility, with explicit classname and via the driver: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles <hdfs://storefileoutput> <tablename> + $ bin/hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles <hdfs://storefileoutput> <tablename> .. and via the Driver.. - HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-VERSION.jar completebulkload <hdfs://storefileoutput> <tablename> + HADOOP_CLASSPATH=`${HBASE_HOME}/bin/hbase classpath` ${HADOOP_HOME}/bin/hadoop jar ${HBASE_HOME}/hbase-VERSION.jar completebulkload <hdfs://storefileoutput> <tablename>
@@ -493,10 +493,10 @@ row10 c1 c2 WALPlayer can also generate HFiles for later bulk importing, in that case only a single table and no mapping can be specified. Invoke via: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer [options] <wal inputdir> <tables> [<tableMappings>]> + $ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer [options] <wal inputdir> <tables> [<tableMappings>]> For example: - $ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer /backuplogdir oldTable1,oldTable2 newTable1,newTable2 + $ bin/hbase org.apache.hadoop.hbase.mapreduce.WALPlayer /backuplogdir oldTable1,oldTable2 newTable1,newTable2 WALPlayer, by default, runs as a mapreduce job. To NOT run WALPlayer as a mapreduce job on your cluster, force it to run all in the local process by adding the flags @@ -511,7 +511,7 @@ row10 c1 c2 sanity check to ensure that HBase can read all the blocks of a table if there are any concerns of metadata inconsistency. It will run the mapreduce all in a single process but it will run faster if you have a MapReduce cluster in place for it to exploit. - $ bin/hbase org.apache.hadoop.hbase.mapreduce.RowCounter <tablename> [<column1> <column2>...] + $ bin/hbase org.apache.hadoop.hbase.mapreduce.RowCounter <tablename> [<column1> <column2>...] Note: caching for the input Scan is configured via hbase.client.scanner.caching in the job configuration. @@ -542,7 +542,7 @@ row10 c1 c2 The program allows you to limit the scope of the run. Provide a row regex or prefix to limit the rows to analyze. Use hbase.mapreduce.scan.column.family to specify scanning a single column family. - $ bin/hbase org.apache.hadoop.hbase.mapreduce.CellCounter <tablename> <outputDir> [regex or prefix] + $ bin/hbase org.apache.hadoop.hbase.mapreduce.CellCounter <tablename> <outputDir> [regex or prefix] Note: just like RowCounter, caching for the input Scan is configured via hbase.client.scanner.caching in the job configuration.
@@ -585,7 +585,7 @@ row10 c1 c2 Merge Merge is a utility that can merge adjoining regions in the same table (see org.apache.hadoop.hbase.util.Merge). - $ bin/hbase org.apache.hadoop.hbase.util.Merge <tablename> <region1> <region2> + $ bin/hbase org.apache.hadoop.hbase.util.Merge <tablename> <region1> <region2> If you feel you have too many regions and want to consolidate them, Merge is the utility you need. Merge must run be done when the cluster is down. See the Node Decommission You can stop an individual RegionServer by running the following script in the HBase directory on the particular node: - $ ./bin/hbase-daemon.sh stop regionserver + $ ./bin/hbase-daemon.sh stop regionserver The RegionServer will first close all regions and then shut itself down. On shutdown, the RegionServer's ephemeral node in ZooKeeper will expire. The master will notice the RegionServer gone and will treat it as a 'crashed' server; it will reassign the nodes the @@ -627,7 +627,7 @@ row10 c1 c2 the RegionServer's znode gone. In Apache HBase 0.90.2, we added facility for having a node gradually shed its load and then shutdown itself down. Apache HBase 0.90.2 added the graceful_stop.sh script. Here is its usage: - $ ./bin/graceful_stop.sh + $ ./bin/graceful_stop.sh Usage: graceful_stop.sh [--config &conf-dir>] [--restart] [--reload] [--thrift] [--rest] &hostname> thrift If we should stop/start thrift before/after the hbase stop/start rest If we should stop/start rest before/after the hbase stop/start @@ -729,7 +729,7 @@ false You can also ask this script to restart a RegionServer after the shutdown AND move its old regions back into place. The latter you might do to retain data locality. A primitive rolling restart might be effected by running something like the following: - $ for i in `cat conf/regionservers|sort`; do ./bin/graceful_stop.sh --restart --reload --debug $i; done &> /tmp/log.txt & + $ for i in `cat conf/regionservers|sort`; do ./bin/graceful_stop.sh --restart --reload --debug $i; done &> /tmp/log.txt & Tail the output of /tmp/log.txt to follow the scripts progress. The above does RegionServers only. The script will also disable the load balancer before moving the regions. You'd need to do the master update separately. Do it before you run the @@ -741,18 +741,18 @@ false Run hbck to ensure the cluster consistent - $ ./bin/hbase hbck Effect repairs if inconsistent. + $ ./bin/hbase hbck Effect repairs if inconsistent. Restart the Master: - $ ./bin/hbase-daemon.sh stop master; ./bin/hbase-daemon.sh start master + $ ./bin/hbase-daemon.sh stop master; ./bin/hbase-daemon.sh start master Run the graceful_stop.sh script per RegionServer. For example: - $ for i in `cat conf/regionservers|sort`; do ./bin/graceful_stop.sh --restart --reload --debug $i; done &> /tmp/log.txt & + $ for i in `cat conf/regionservers|sort`; do ./bin/graceful_stop.sh --restart --reload --debug $i; done &> /tmp/log.txt & If you are running thrift or rest servers on the RegionServer, pass --thrift or --rest options (See usage for graceful_stop.sh script). @@ -1678,7 +1678,7 @@ false To turn on the snapshot support just set the hbase.snapshot.enabled property to true. (Snapshots are enabled by default in 0.95+ and off by default in 0.94.6+) - + <property> <name>hbase.snapshot.enabled</name> <value>true</value> @@ -1690,7 +1690,7 @@ false Take a Snapshot You can take a snapshot of a table regardless of whether it is enabled or disabled. The snapshot operation doesn't involve any data copying. - + $ ./bin/hbase shell hbase> snapshot 'myTable', 'myTableSnapshot-122112' @@ -1699,7 +1699,7 @@ hbase> snapshot 'myTable', 'myTableSnapshot-122112' xml:id="ops.snapshots.list"> Listing Snapshots List all snapshots taken (by printing the names and relative information). - + $ ./bin/hbase shell hbase> list_snapshots @@ -1709,7 +1709,7 @@ hbase> list_snapshots Deleting Snapshots You can remove a snapshot, and the files retained for that snapshot will be removed if no longer needed. - + $ ./bin/hbase shell hbase> delete_snapshot 'myTableSnapshot-122112' @@ -1720,7 +1720,7 @@ hbase> delete_snapshot 'myTableSnapshot-122112' From a snapshot you can create a new table (clone operation) with the same data that you had when the snapshot was taken. The clone operation, doesn't involve data copies, and a change to the cloned table doesn't impact the snapshot or the original table. - + $ ./bin/hbase shell hbase> clone_snapshot 'myTableSnapshot-122112', 'myNewTestTable' @@ -1731,7 +1731,7 @@ hbase> clone_snapshot 'myTableSnapshot-122112', 'myNewTestTable' The restore operation requires the table to be disabled, and the table will be restored to the state at the time when the snapshot was taken, changing both data and schema if required. - + $ ./bin/hbase shell hbase> disable 'myTable' hbase> restore_snapshot 'myTableSnapshot-122112' @@ -1763,14 +1763,14 @@ hbase> restore_snapshot 'myTableSnapshot-122112' hbase cluster does not have to be online. To copy a snapshot called MySnapshot to an HBase cluster srv2 (hdfs:///srv2:8082/hbase) using 16 mappers: - $ bin/hbase class org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase -mappers 16 + $ bin/hbase class org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase -mappers 16 Limiting Bandwidth Consumption You can limit the bandwidth consumption when exporting a snapshot, by specifying the -bandwidth parameter, which expects an integer representing megabytes per second. The following example limits the above example to 200 MB/sec. - $ bin/hbase class org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase -mappers 16 -bandwidth 200 + $ bin/hbase class org.apache.hadoop.hbase.snapshot.ExportSnapshot -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase -mappers 16 -bandwidth 200
@@ -2035,7 +2035,7 @@ hbase shell> clone_snapshot 'tableSnapshot', 'newTableName' hbase shell> delete_snapshot 'tableSnapshot' hbase shell> drop 'tableName']]> or in code it would be as follows: - void rename(HBaseAdmin admin, String oldTableName, String newTableName) { + void rename(HBaseAdmin admin, String oldTableName, String newTableName) { String snapshotName = randomName(); admin.disableTable(oldTableName); admin.snapshot(snapshotName, oldTableName); diff --git src/main/docbkx/performance.xml src/main/docbkx/performance.xml index c00b635..47b67be 100644 --- src/main/docbkx/performance.xml +++ src/main/docbkx/performance.xml @@ -475,7 +475,7 @@ hbase> create 'mytable',{NAME => 'colfam1', BLOOMFILTER => 'ROWCOL'}< Constants When people get started with HBase they have a tendency to write code that looks like this: - + Get get = new Get(rowkey); Result r = htable.get(get); byte[] b = r.getValue(Bytes.toBytes("cf"), Bytes.toBytes("attr")); // returns current version of value @@ -483,7 +483,7 @@ byte[] b = r.getValue(Bytes.toBytes("cf"), Bytes.toBytes("attr")); // returns c But especially when inside loops (and MapReduce jobs), converting the columnFamily and column-names to byte-arrays repeatedly is surprisingly expensive. It's better to use constants for the byte-arrays, like this: - + public static final byte[] CF = "cf".getBytes(); public static final byte[] ATTR = "attr".getBytes(); ... @@ -517,14 +517,14 @@ byte[] b = r.getValue(CF, ATTR); // returns current version of value There are two different approaches to pre-creating splits. The first approach is to rely on the default HBaseAdmin strategy (which is implemented in Bytes.split)... - -byte[] startKey = ...; // your lowest keuy + +byte[] startKey = ...; // your lowest key byte[] endKey = ...; // your highest key int numberOfRegions = ...; // # of regions to create admin.createTable(table, startKey, endKey, numberOfRegions); And the other approach is to define the splits yourself... - + byte[][] splits = ...; // create your own splits admin.createTable(table, splits); @@ -676,7 +676,7 @@ admin.createTable(table, splits); Scan.HINT_LOOKAHEAD can be set the on Scan object. The following code instructs the RegionServer to attempt two iterations of next before a seek is scheduled: - + Scan scan = new Scan(); scan.addColumn(...); scan.setAttribute(Scan.HINT_LOOKAHEAD, Bytes.toBytes(2)); @@ -701,7 +701,7 @@ table.getScanner(scan); xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/ResultScanner.html">ResultScanners you can cause problems on the RegionServers. Always have ResultScanner processing enclosed in try/catch blocks... - + Scan scan = new Scan(); // set attrs... ResultScanner rs = htable.getScanner(scan); @@ -907,7 +907,7 @@ htable.close(); shortcircuit reads configuration page for how to enable the latter, better version of shortcircuit. For example, here is a minimal config. enabling short-circuit reads added to hbase-site.xml: - + dfs.client.read.shortcircuit true diff --git src/main/docbkx/preface.xml src/main/docbkx/preface.xml index 5885bfe..582d04a 100644 --- src/main/docbkx/preface.xml +++ src/main/docbkx/preface.xml @@ -45,7 +45,7 @@ the src/main/docbkx directory of the HBase source. This reference guide is marked up using DocBook from which the the finished guide is generated as part of the 'site' build target. Run - mvn site to generate this documentation. Amendments and + mvn site to generate this documentation. Amendments and improvements to the documentation are welcomed. Click this link to file a new documentation bug against Apache HBase with some diff --git src/main/docbkx/schema_design.xml src/main/docbkx/schema_design.xml index 2fdeb00..de05c14 100644 --- src/main/docbkx/schema_design.xml +++ src/main/docbkx/schema_design.xml @@ -44,7 +44,7 @@ xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html">HBaseAdmin in the Java API. Tables must be disabled when making ColumnFamily modifications, for example: - + Configuration config = HBaseConfiguration.create(); HBaseAdmin admin = new HBaseAdmin(conf); String table = "myTable"; @@ -184,7 +184,7 @@ admin.enableTable(table); in those eight bytes. If you stored this number as a String -- presuming a byte per character -- you need nearly 3x the bytes. Not convinced? Below is some sample code that you can run on your own. - + // long // long l = 1234567890L; @@ -307,7 +307,7 @@ COLUMN CELL are accessible in the keyspace. To conclude this example, the following is an example of how appropriate splits can be pre-created for hex-keys:. - - + long bucket = timestamp % numBuckets; … to construct: @@ -1041,13 +1041,13 @@ long bucket = timestamp % numBuckets; ]]> The other option we had was to do this entirely using: - :... :... ]]> where each row would contain multiple values. So in one case reading the first thirty values would be: - 'FixedWidthUsername' LIMIT => 30} ]]> And in the second case it would be diff --git src/main/docbkx/security.xml src/main/docbkx/security.xml index ac25638..4fe5aa0 100644 --- src/main/docbkx/security.xml +++ src/main/docbkx/security.xml @@ -73,7 +73,7 @@ operation that must be added to the hbase-site.xml file on every server machine in the cluster. Required for even the most basic interactions with a secure Hadoop configuration, independent of HBase security. - hbase.regionserver.kerberos.principal hbase/_HOST@YOUR-REALM.COM @@ -117,7 +117,7 @@ underlying HDFS configuration is secure. Add the following to the hbase-site.xml file on every server machine in the cluster: - hbase.security.authentication kerberos @@ -140,7 +140,7 @@ First, refer to and ensure that your underlying HDFS configuration is secure. Add the following to the hbase-site.xml file on every client: - hbase.security.authentication kerberos @@ -154,7 +154,7 @@ Once HBase is configured for secure RPC it is possible to optionally configure encrypted communication. To do so, add the following to the hbase-site.xml file on every client: - hbase.rpc.protection privacy @@ -162,7 +162,7 @@ ]]> This configuration property can also be set on a per connection basis. Set it in the Configuration supplied to HTable: - + Configuration conf = HBaseConfiguration.create(); conf.set("hbase.rpc.protection", "privacy"); HTable table = new HTable(conf, tablename); @@ -173,7 +173,7 @@ HTable table = new HTable(conf, tablename);
Client-side Configuration for Secure Operation - Thrift Gateway - Add the following to the hbase-site.xml file for every Thrift gateway: Add the following to the hbase-site.xml file for every Thrift gateway: hbase.thrift.keytab.file /etc/hbase/conf/hbase.keytab @@ -193,7 +193,7 @@ HTable table = new HTable(conf, tablename); add the hbase.thrift.kerberos.principal to the _acl_ table. For example, to give the Thrift API principal, thrift_server, administrative access, a command such as this one will suffice: - For more information about ACLs, please see the Client-side Configuration for Secure Operation - REST Gateway Add the following to the hbase-site.xml file for every REST gateway: - hbase.rest.keytab.file $KEYTAB @@ -276,7 +276,7 @@ grant 'thrift_server', 'RWCA' add the hbase.rest.kerberos.principal to the _acl_ table. For example, to give the REST API principal, rest_server, administrative access, a command such as this one will suffice: - For more information about ACLs, please see the To allow proxy users, add the following to the hbase-site.xml file for every HBase server: - hadoop.security.authorization true @@ -316,7 +316,7 @@ grant 'rest_server', 'RWCA' $GROUPS. To enable REST gateway impersonation, add the following to the hbase-site.xml file for every REST gateway. - hbase.rest.authentication.type kerberos @@ -367,7 +367,7 @@ grant 'rest_server', 'RWCA' Server-side Configuration for Simple User Access Operation Add the following to the hbase-site.xml file on every server machine in the cluster: - hbase.security.authentication simple @@ -387,7 +387,7 @@ grant 'rest_server', 'RWCA' ]]> For 0.94, add the following to the hbase-site.xml file on every server machine in the cluster: - hbase.rpc.engine org.apache.hadoop.hbase.ipc.SecureRpcEngine @@ -408,7 +408,7 @@ grant 'rest_server', 'RWCA'
Client-side Configuration for Simple User Access Operation Add the following to the hbase-site.xml file on every client: - hbase.security.authentication simple @@ -416,7 +416,7 @@ grant 'rest_server', 'RWCA' ]]> For 0.94, add the following to the hbase-site.xml file on every server machine in the cluster: - hbase.rpc.engine org.apache.hadoop.hbase.ipc.SecureRpcEngine @@ -432,7 +432,7 @@ grant 'rest_server', 'RWCA' The Thrift gateway user will need access. For example, to give the Thrift API user, thrift_server, administrative access, a command such as this one will suffice: - For more information about ACLs, please see the The REST gateway user will need access. For example, to give the REST API user, rest_server, administrative access, a command such as this one will suffice: - For more information about ACLs, please see the HFile V3 version from 0.98 onwards supports tags and this feature can be turned on using the following configuration - hfile.format.version 3 @@ -488,7 +488,7 @@ grant 'rest_server', 'RWCA' The way rowkeys, column families, qualifiers and values are encoded using different Encoding Algos, similarly the tags can also be encoded. Tag encoding can be turned on per CF. Default is always turn ON. To turn on the tag encoding on the HFiles use - Note that encoding of tags takes place only if the DataBlockEncoder is enabled for the @@ -496,14 +496,14 @@ HColumnDescriptor#setCompressTags(boolean compressTags) As we compress the WAL entries using Dictionary the tags present in the WAL can also be compressed using Dictionary. Every tag is compressed individually using WAL Dictionary. To turn ON tag compression in WAL dictionary enable the property - hbase.regionserver.wal.tags.enablecompression true ]]> To add tags to every cell during Puts, the following apis are provided - @@ -1392,7 +1392,7 @@ Put#add(byte[] family, byte[] qualifier, long ts, byte[] value, Tag[] tag) processes before setting up ACLs. To enable the AccessController, modify the hbase-site.xml file on every server machine in the cluster to look like: - hbase.coprocessor.master.classes org.apache.hadoop.hbase.security.access.AccessController @@ -1413,21 +1413,21 @@ Put#add(byte[] family, byte[] qualifier, long ts, byte[] value, Tag[] tag) on configuring it refer to Access Control section. The ACLs can be specified for every mutation using the APIs - perms) ]]> For example, to provide read permission to an user ‘user1’ then - Generally the ACL applied on the table and CF takes precedence over Cell level ACL. In order to make the cell level ACL to take precedence use the following API, - Please note that inorder to use this feature, HFile V3 version should be turned on. - hfile.format.version 3 @@ -1445,7 +1445,7 @@ Mutation.setACLStrategy(boolean cellFirstStrategy) Grant - [
[ [ ] ] ] ]]> @@ -1463,7 +1463,7 @@ grant [
[ [ Revoke - [
[ [ ] ] ] ]]> @@ -1472,7 +1472,7 @@ revoke [
[ [ ] ] ] The alter command has been extended to allow ownership assignment: - 'username|@group'} ]]> @@ -1524,7 +1524,7 @@ user_permission
You have to enable the secure bulk load to work properly. You can modify the hbase-site.xml file on every server machine in the cluster and add the SecureBulkLoadEndpoint class to the list of regionserver coprocessors: - hbase.bulkload.staging.dir /tmp/hbase-staging @@ -1554,7 +1554,7 @@ user_permission
the cell or even know of its existence. Visibility expressions like the above can be added when storing or mutating a cell using the API, - Mutation#setCellVisibility(new CellVisibility(String labelExpession)); + Mutation#setCellVisibility(new CellVisibility(String labelExpession)); Where the labelExpression could be '( secret | topsecret ) & !probationary' We build the user's label set in the RPC context when a request is first received by the HBase RegionServer. How users are associated with labels is pluggable. The default plugin @@ -1609,7 +1609,7 @@ user_permission
HBase stores cell level labels as cell tags. HFile version 3 adds the cell tags support. Be sure to use HFile version 3 by setting this property in every server site configuration file: - hfile.format.version 3 @@ -1618,7 +1618,7 @@ user_permission
You will also need to make sure the VisibilityController coprocessor is active on every table to protect by adding it to the list of system coprocessors in the server site configuration files: - hbase.coprocessor.master.classes org.apache.hadoop.hbase.security.visibility.VisibilityController @@ -1680,7 +1680,7 @@ user_permission
xml:id="hbase.encryption.server.configuration"> Configuration Create a secret key of appropriate length for AES. - \ -genseckey -keyalg AES -keysize 128 \ @@ -1693,7 +1693,7 @@ $ keytool -keystore /path/to/hbase/conf/hbase.jks \ the HBase service account. Configure HBase daemons to use a key provider backed by the KeyStore files for retrieving the cluster master key as needed. - hbase.crypto.keyprovider org.apache.hadoop.hbase.io.crypto.KeyStoreKeyProvider @@ -1705,7 +1705,7 @@ $ keytool -keystore /path/to/hbase/conf/hbase.jks \ ]]> By default the HBase service account name will be used to resolve the cluster master key, but you can store it with any arbitrary alias and configure HBase appropriately: - hbase.crypto.master.key.name hbase @@ -1715,14 +1715,14 @@ $ keytool -keystore /path/to/hbase/conf/hbase.jks \ should also have its permissions set to be readable only by the HBase service account. Transparent encryption is a feature of HFile version 3. Be sure to use HFile version 3 by setting this property in every server site configuration file: - hfile.format.version 3 ]]> Finally, configure the secure WAL in every server site configuration file: - hbase.regionserver.hlog.reader.impl org.apache.hadoop.hbase.regionserver.wal.SecureProtobufLogReader @@ -1769,7 +1769,7 @@ $ keytool -keystore /path/to/hbase/conf/hbase.jks \ Master key rotation can be achieved by updating the KeyStore to contain a new master key, as described above, with also the old master key added to the KeyStore under a different alias. Then, configure fallback to the old master key in the HBase site file: - hbase.crypto.master.alternate.key.name hbase.old diff --git src/main/docbkx/thrift_filter_language.xml src/main/docbkx/thrift_filter_language.xml index a0535a4..74da600 100644 --- src/main/docbkx/thrift_filter_language.xml +++ src/main/docbkx/thrift_filter_language.xml @@ -348,13 +348,13 @@ is evaluated as Syntax - KeyOnlyFilter () + KeyOnlyFilter () Example - KeyOnlyFilter ()" + KeyOnlyFilter ()" @@ -368,13 +368,13 @@ is evaluated as Syntax - FirstKeyOnlyFilter () + FirstKeyOnlyFilter () Example - FirstKeyOnlyFilter () + FirstKeyOnlyFilter () @@ -388,13 +388,13 @@ is evaluated as Syntax - PrefixFilter (‘<row_prefix>’) + PrefixFilter (‘<row_prefix>’) Example - PrefixFilter (‘Row’) + PrefixFilter (‘Row’) @@ -409,13 +409,13 @@ is evaluated as Syntax - ColumnPrefixFilter(‘<column_prefix>’) + ColumnPrefixFilter(‘<column_prefix>’) Example - ColumnPrefixFilter(‘Col’) + ColumnPrefixFilter(‘Col’) @@ -430,13 +430,13 @@ is evaluated as Syntax - MultipleColumnPrefixFilter(‘<column_prefix>’, ‘<column_prefix>’, …, ‘<column_prefix>’) + MultipleColumnPrefixFilter(‘<column_prefix>’, ‘<column_prefix>’, …, ‘<column_prefix>’) Example - MultipleColumnPrefixFilter(‘Col1’, ‘Col2’) + MultipleColumnPrefixFilter(‘Col1’, ‘Col2’) @@ -449,14 +449,14 @@ is evaluated as Syntax - ColumnCountGetFilter + ColumnCountGetFilter (‘<limit>’) Example - ColumnCountGetFilter (4) + ColumnCountGetFilter (4) @@ -469,13 +469,13 @@ is evaluated as Syntax - PageFilter (‘<page_size>’) + PageFilter (‘<page_size>’) Example - PageFilter (2) + PageFilter (2) @@ -489,13 +489,13 @@ is evaluated as Syntax - ColumnPaginationFilter(‘<limit>’, ‘<offset>’) + ColumnPaginationFilter(‘<limit>’, ‘<offset>’) Example - ColumnPaginationFilter (3, 5) + ColumnPaginationFilter (3, 5) @@ -509,13 +509,13 @@ is evaluated as Syntax - InclusiveStopFilter(‘<stop_row_key>’) + InclusiveStopFilter(‘<stop_row_key>’) Example - InclusiveStopFilter ('Row2') + InclusiveStopFilter ('Row2') @@ -528,13 +528,13 @@ is evaluated as Syntax - TimeStampsFilter (<timestamp>, <timestamp>, ... ,<timestamp>) + TimeStampsFilter (<timestamp>, <timestamp>, ... ,<timestamp>) Example - TimeStampsFilter (5985489, 48895495, 58489845945) + TimeStampsFilter (5985489, 48895495, 58489845945) @@ -549,13 +549,13 @@ is evaluated as Syntax - RowFilter (<compareOp>, ‘<row_comparator>’) + RowFilter (<compareOp>, ‘<row_comparator>’) Example - RowFilter (<=, ‘xyz) + RowFilter (<=, ‘xyz) @@ -570,13 +570,13 @@ is evaluated as Syntax - QualifierFilter (<compareOp>, ‘<qualifier_comparator>’) + QualifierFilter (<compareOp>, ‘<qualifier_comparator>’) Example - QualifierFilter (=, ‘Column1’) + QualifierFilter (=, ‘Column1’) @@ -591,13 +591,13 @@ is evaluated as Syntax - QualifierFilter (<compareOp>,‘<qualifier_comparator>’) + QualifierFilter (<compareOp>,‘<qualifier_comparator>’) Example - QualifierFilter (=,‘Column1’) + QualifierFilter (=,‘Column1’) @@ -611,13 +611,13 @@ is evaluated as Syntax - ValueFilter (<compareOp>,‘<value_comparator>’) + ValueFilter (<compareOp>,‘<value_comparator>’) Example - ValueFilter (!=, ‘Value’) + ValueFilter (!=, ‘Value’) @@ -640,26 +640,26 @@ is evaluated as Syntax - ’,‘’, , , ‘’,‘’, , , ‘ - ’,‘’, )]]> + ’,‘’, )]]> - DependentColumnFilter (‘<family>’,‘<qualifier>’) + DependentColumnFilter (‘<family>’,‘<qualifier>’) Example - DependentColumnFilter (‘conf’, ‘blacklist’, false, >=, ‘zebra’) + DependentColumnFilter (‘conf’, ‘blacklist’, false, >=, ‘zebra’) - DependentColumnFilter (‘conf’, 'blacklist', true) + DependentColumnFilter (‘conf’, 'blacklist', true) - DependentColumnFilter (‘conf’, 'blacklist') + DependentColumnFilter (‘conf’, 'blacklist') @@ -683,16 +683,16 @@ is evaluated as Syntax - SingleColumnValueFilter(‘<family>’,‘<qualifier>’, <compare operator>, ‘<comparator>’, <filterIfColumnMissing_boolean>, <latest_version_boolean>) + SingleColumnValueFilter(‘<family>’,‘<qualifier>’, <compare operator>, ‘<comparator>’, <filterIfColumnMissing_boolean>, <latest_version_boolean>) - SingleColumnValueFilter(‘<family>’, ‘<qualifier>, <compare operator>, ‘<comparator>’) + SingleColumnValueFilter(‘<family>’, ‘<qualifier>, <compare operator>, ‘<comparator>’) Example - SingleColumnValueFilter (‘FamilyA’, ‘Column1’, <=, ‘abc’, true, false) + SingleColumnValueFilter (‘FamilyA’, ‘Column1’, <=, ‘abc’, true, false) SingleColumnValueFilter (‘FamilyA’, ‘Column1’, <=, ‘abc’) @@ -710,19 +710,19 @@ is evaluated as Syntax - SingleColumnValueExcludeFilter('<family>', '<qualifier>', <compare operator>, '<comparator>', <latest_version_boolean>, <filterIfColumnMissing_boolean>) + SingleColumnValueExcludeFilter('<family>', '<qualifier>', <compare operator>, '<comparator>', <latest_version_boolean>, <filterIfColumnMissing_boolean>) - SingleColumnValueExcludeFilter('<family>', '<qualifier>', <compare operator>, '<comparator>') + SingleColumnValueExcludeFilter('<family>', '<qualifier>', <compare operator>, '<comparator>') Example - SingleColumnValueExcludeFilter (‘FamilyA’, ‘Column1’, ‘<=’, ‘abc’, ‘false’, ‘true’) + SingleColumnValueExcludeFilter (‘FamilyA’, ‘Column1’, ‘<=’, ‘abc’, ‘false’, ‘true’) - SingleColumnValueExcludeFilter (‘FamilyA’, ‘Column1’, ‘<=’, ‘abc’) + SingleColumnValueExcludeFilter (‘FamilyA’, ‘Column1’, ‘<=’, ‘abc’) @@ -739,13 +739,13 @@ is evaluated as Syntax - ColumnRangeFilter (‘<minColumn>’, <minColumnInclusive_bool>, ‘<maxColumn>’, <maxColumnInclusive_bool>) + ColumnRangeFilter (‘<minColumn>’, <minColumnInclusive_bool>, ‘<maxColumn>’, <maxColumnInclusive_bool>) Example - ColumnRangeFilter (‘abc’, true, ‘xyz’, false) + ColumnRangeFilter (‘abc’, true, ‘xyz’, false) diff --git src/main/docbkx/tracing.xml src/main/docbkx/tracing.xml index 220cc79..b5dfd35 100644 --- src/main/docbkx/tracing.xml +++ src/main/docbkx/tracing.xml @@ -81,7 +81,7 @@ public void receiveSpan(Span span); change your config to use zipkin receiver, distribute the new configuration and then (rolling) restart. Here is the example of manual setup procedure. - hbase.zipkin.collector-hostname and hbase.zipkin.collector-port property with a value describing the Zipkin collector server to which span information are sent. - hbase.trace.spanreceiver.classes org.htrace.impl.ZipkinSpanReceiver @@ -118,7 +118,7 @@ $ cp target/htrace-zipkin-*-jar-with-dependencies.jar $HBASE_HOME/lib/ Client Modifications In order to turn on tracing in your client code, you must initialize the module sending spans to receiver once per client process. - Then you simply start tracing span before requests you think are interesting, and close it when the request is done. For example, if you wanted to trace all of your get operations, you change this: - into: - If you wanted to trace half of your 'get' operations, you would pass in: - in lieu of Sampler.ALWAYS to Trace.startSpan(). diff --git src/main/docbkx/troubleshooting.xml src/main/docbkx/troubleshooting.xml index ffe0816..01ad5dc 100644 --- src/main/docbkx/troubleshooting.xml +++ src/main/docbkx/troubleshooting.xml @@ -128,7 +128,7 @@ this or confirm this is happening GC logging can be turned on in the Java virtual machine. To enable, in hbase-env.sh, uncomment one of the below lines : - + # This enables basic gc logging to the .out file. # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps" @@ -194,13 +194,13 @@ collections take but if its too small, objects are promoted to old gen too quickly). In the below we constrain new gen size to 64m. Add the below line in hbase-env.sh: - + export SERVER_GC_OPTS="$SERVER_GC_OPTS -XX:NewSize=64m -XX:MaxNewSize=64m" Similarly, to enable GC logging for client processes, uncomment one of the below lines in hbase-env.sh: - + # This enables basic gc logging to the .out file. # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps" @@ -293,7 +293,7 @@ export SERVER_GC_OPTS="$SERVER_GC_OPTS -XX:NewSize=64m -XX:MaxNewSize=64m" zkcli zkcli is a very useful tool for investigating ZooKeeper-related issues. To invoke: - + ./hbase zkcli -server host:port <cmd> <args> The commands (and arguments) are: @@ -377,7 +377,7 @@ Swap: 16008732k total, 14348k used, 15994384k free, 11106908k cached jps is shipped with every JDK and gives the java process ids for the current user (if root, then it gives the ids for all users). Example: - + hadoop@sv4borg12:~$ jps 1322 TaskTracker 17789 HRegionServer @@ -421,7 +421,7 @@ hadoop@sv4borg12:~$ jps You can then do stuff like checking out the full command line that started the process: - + hadoop@sv4borg12:~$ ps aux | grep HRegionServer hadoop 17789 155 35.2 9067824 8604364 ? S<l Mar04 9855:48 /usr/java/jdk1.6.0_14/bin/java -Xmx8000m -XX:+DoEscapeAnalysis -XX:+AggressiveOpts -XX:+UseConcMarkSweepGC -XX:NewSize=64m -XX:MaxNewSize=64m -XX:CMSInitiatingOccupancyFraction=88 -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Xloggc:/export1/hadoop/logs/gc-hbase.log -Dcom.sun.management.jmxremote.port=10102 -Dcom.sun.management.jmxremote.authenticate=true -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.password.file=/home/hadoop/hbase/conf/jmxremote.password -Dcom.sun.management.jmxremote -Dhbase.log.dir=/export1/hadoop/logs -Dhbase.log.file=hbase-hadoop-regionserver-sv4borg12.log -Dhbase.home.dir=/home/hadoop/hbase -Dhbase.id.str=hadoop -Dhbase.root.logger=INFO,DRFA -Djava.library.path=/home/hadoop/hbase/lib/native/Linux-amd64-64 -classpath /home/hadoop/hbase/bin/../conf:[many jars]:/home/hadoop/hadoop/conf org.apache.hadoop.hbase.regionserver.HRegionServer start @@ -791,7 +791,7 @@ at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:210) HADOOP_CLASSPATH set to include the HBase dependencies. The "hbase classpath" utility can be used to do this easily. For example (substitute VERSION with your HBase version): - + HADOOP_CLASSPATH=`hbase classpath` hadoop jar $HBASE_HOME/hbase-VERSION.jar rowcounter usertable See HDFS Utilization of Tables and Regions To determine how much space HBase is using on HDFS use the hadoop shell commands from the NameNode. For example... - hadoop fs -dus /hbase/ ...returns the summarized disk + hadoop fs -dus /hbase/ ...returns the summarized disk utilization for all HBase objects. - hadoop fs -dus /hbase/myTable ...returns the summarized + hadoop fs -dus /hbase/myTable ...returns the summarized disk utilization for the HBase table 'myTable'. - hadoop fs -du /hbase/myTable ...returns a list of the + hadoop fs -du /hbase/myTable ...returns a list of the regions under the HBase table 'myTable' and their disk utilization. For more information on HDFS shell commands, see the HDFS @@ -1061,7 +1061,7 @@ ERROR org.apache.hadoop.hbase.regionserver.HRegionServer: ZooKeeper session expi If you wish to increase the session timeout, add the following to your hbase-site.xml to increase the timeout from the default of 60 seconds to 120 seconds. - + zookeeper.session.timeout 1200000 @@ -1345,13 +1345,13 @@ security.provider.1=sun.security.pkcs11.SunPKCS11 ${java.home}/lib/security/nss. detail at . To find the current value on your system, run the following command: - [user@host]# cat /proc/sys/vm/min_free_kbytes + [user@host]# cat /proc/sys/vm/min_free_kbytes Next, raise the value. Try doubling, then quadrupling the value. Note that setting the value too low or too high could have detrimental effects on your system. Consult your operating system vendor for specific recommendations. Use the following command to modify the value of min_free_kbytes, substituting <value> with your intended value: - [user@host]# echo <value> > /proc/sys/vm/min_free_kbytes + [user@host]# echo <value> > /proc/sys/vm/min_free_kbytes diff --git src/main/docbkx/upgrading.xml src/main/docbkx/upgrading.xml index 6d31c81..23c3636 100644 --- src/main/docbkx/upgrading.xml +++ src/main/docbkx/upgrading.xml @@ -158,7 +158,7 @@ HDFS and ZooKeeper should be up and running during the upgrade process. hbase-0.96.0 comes with an upgrade script. Run - $ bin/hbase upgrade to see its usage. The script + $ bin/hbase upgrade to see its usage. The script has two main modes: -check, and -execute.
check @@ -205,7 +205,7 @@ There are some HFileV1, or corrupt files (files with incorrect major version) By default, the check step scans the hbase root directory (defined as hbase.rootdir in the configuration). To scan a specific directory only, pass the -dir option. - $ bin/hbase upgrade -check -dir /myHBase/testTable + $ bin/hbase upgrade -check -dir /myHBase/testTable The above command would detect HFileV1s in the /myHBase/testTable directory. Once the check step reports all the HFileV1 files have been rewritten, it is safe to proceed with the upgrade. @@ -246,7 +246,7 @@ There are some HFileV1, or corrupt files (files with incorrect major version) To run the execute step, make sure that first you have copied hbase-0.96.0 binaries everywhere under servers and under clients. Make sure the 0.94.0 cluster is down. Then do as follows: - $ bin/hbase upgrade -execute + $ bin/hbase upgrade -execute Here is some sample output. Starting Namespace upgrade @@ -265,7 +265,7 @@ Successfully completed Log splitting If the output from the execute step looks good, stop the zookeeper instance you started to do the upgrade: - $ ./hbase/bin/hbase-daemon.sh stop zookeeper + $ ./hbase/bin/hbase-daemon.sh stop zookeeper Now start up hbase-0.96.0.
/tmp which is often cleared on system restart. In the example below we have ZooKeeper persist to /user/local/zookeeper. - ... @@ -146,7 +146,7 @@ To point HBase at an existing ZooKeeper cluster, one that is not managed by HBase, set HBASE_MANAGES_ZK in conf/hbase-env.sh to false - + ... # Tell HBase whether it should manage its own instance of Zookeeper or not. export HBASE_MANAGES_ZK=false @@ -160,7 +160,7 @@ regular start/stop scripts. If you would like to run ZooKeeper yourself, independent of HBase start/stop, you would do the following - + ${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper @@ -225,7 +225,7 @@ ${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper On each host that will run an HBase client (e.g. hbase shell), add the following file to the HBase home directory's conf directory: - + Client { com.sun.security.auth.module.Krb5LoginModule required useKeyTab=false @@ -244,7 +244,7 @@ Client { configuration file in the conf directory of the node's HBASE_HOME directory that looks like the following: - + Server { com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true @@ -276,7 +276,7 @@ Client { Modify your hbase-env.sh to include the following: - + export HBASE_OPTS="-Djava.security.auth.login.config=$CLIENT_CONF" export HBASE_MANAGES_ZK=true export HBASE_ZOOKEEPER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF" @@ -290,7 +290,7 @@ export HBASE_REGIONSERVER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_ Modify your hbase-site.xml on each node that will run zookeeper, master or regionserver to contain: - hbase.zookeeper.quorum @@ -332,7 +332,7 @@ bin/hbase regionserver start
External Zookeeper Configuration Add a JAAS configuration file that looks like: - + Client { com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true @@ -348,7 +348,7 @@ Client { Modify your hbase-env.sh to include the following: - + export HBASE_OPTS="-Djava.security.auth.login.config=$CLIENT_CONF" export HBASE_MANAGES_ZK=false export HBASE_MASTER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_CONF" @@ -359,7 +359,7 @@ export HBASE_REGIONSERVER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_ Modify your hbase-site.xml on each node that will run a master or regionserver to contain: - hbase.zookeeper.quorum @@ -377,13 +377,13 @@ export HBASE_REGIONSERVER_OPTS="-Djava.security.auth.login.config=$HBASE_SERVER_ Quorum hosts. Add a zoo.cfg for each Zookeeper Quorum host containing: - + authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider kerberos.removeHostFromPrincipal=true kerberos.removeRealmFromPrincipal=true Also on each of these hosts, create a JAAS configuration file containing: - + Server { com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true @@ -397,7 +397,7 @@ Server { pathname of this file as $ZK_SERVER_CONF below. Start your Zookeepers on each Zookeeper Quorum host with: - + SERVER_JVMFLAGS="-Djava.security.auth.login.config=$ZK_SERVER_CONF" bin/zkServer start @@ -482,7 +482,7 @@ bin/hbase regionserver & You must override the standard hadoop-core jar file from the target/cached_classpath.txt file with the version containing the HADOOP-7070 fix. You can use the following script to do this: - + echo `find ~/.m2 -name "*hadoop-core*7070*SNAPSHOT.jar"` ':' `cat target/cached_classpath.txt` | sed 's/ //g' > target/tmp.txt mv target/tmp.txt target/cached_classpath.txt