diff --git a/hbase-annotations/src/main/asciidoc/.gitignore b/hbase-annotations/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-checkstyle/src/main/asciidoc/.gitignore b/hbase-checkstyle/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-client/src/main/asciidoc/.gitignore b/hbase-client/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-common/src/main/asciidoc/.gitignore b/hbase-common/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-examples/src/main/asciidoc/.gitignore b/hbase-examples/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-hadoop-compat/src/main/asciidoc/.gitignore b/hbase-hadoop-compat/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-hadoop2-compat/src/main/asciidoc/.gitignore b/hbase-hadoop2-compat/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-it/src/main/asciidoc/.gitignore b/hbase-it/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-prefix-tree/src/main/asciidoc/.gitignore b/hbase-prefix-tree/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-protocol/src/main/asciidoc/.gitignore b/hbase-protocol/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-rest/src/main/asciidoc/.gitignore b/hbase-rest/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-server/src/main/asciidoc/.gitignore b/hbase-server/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-shell/src/main/asciidoc/.gitignore b/hbase-shell/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-testing-util/src/main/asciidoc/.gitignore b/hbase-testing-util/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/hbase-thrift/src/main/asciidoc/.gitignore b/hbase-thrift/src/main/asciidoc/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/pom.xml b/pom.xml
index 87070ba..30eca1f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -791,15 +791,22 @@
+
${basedir}/hbase-common/src/main/resources/hbase-default.xml
- ${basedir}/src/main/xslt/configuration_to_docbook_section.xsl
- ${basedir}/target/docbkx
-
+ ${basedir}/src/main/xslt/configuration_to_asciidoc_chapter.xsl
+
+
+ ^(.*)\.xml$
+ $1.adoc
+
+
+ ${basedir}/target/asciidoc
+
@@ -835,84 +842,6 @@
hbase/checkstyle-suppressions.xml
-
-
- com.agilejava.docbkx
- docbkx-maven-plugin
- 2.0.15
- false
-
-
- org.docbook
- docbook-xml
- 4.4
- runtime
-
-
- net.sf.xslthl
- xslthl
- 2.1.0
- runtime
-
-
-
- 1
- images/
- ${basedir}/src/main/docbkx
- true
- true
- 100
- true
- css/freebsd_docbook.css
- true
- ${basedir}/src/main/docbkx/customization.xsl
- 2
- yes
- UTF-8
-
-
-
- multipage
-
- generate-html
-
- pre-site
-
- true
- true
- ${basedir}/target/docbkx/book
-
-
-
-
-
-
-
-
-
-
-
- onepage
-
- generate-html
-
- pre-site
-
- ${basedir}/target/docbkx/
- book.xml
-
-
-
-
-
-
-
-
-
-
-
- org.apache.maven.pluginsmaven-resources-plugin
@@ -938,24 +867,6 @@
- copy-docbkx
-
- copy-resources
-
- site
-
- target/site
-
-
- ${basedir}/target/docbkx
-
- **/**
-
-
-
-
-
- copy-htaccesscopy-resources
@@ -1002,6 +913,12 @@
velocity1.7
+
+
${basedir}/src/main/site
@@ -1009,6 +926,31 @@
UTF-8
+
+
+ org.asciidoctor
+ asciidoctor-maven-plugin
+ 1.5.2
+
+
+ output-html
+
+ process-asciidoc
+
+ site
+
+ ./images
+ book
+
+ hbase.css
+
+ html5
+ coderay
+ target/site
+
+
+
+ org.jacocojacoco-maven-plugin
@@ -2480,6 +2422,8 @@
maven-javadoc-plugin2.9.1
+ **/generated
+ org.apache.hadoop.hbase.generated.master2048mtrue
@@ -2512,7 +2456,7 @@
User APIThe HBase Application Programmer's API
- org.apache.hadoop.hbase.backup*:org.apache.hadoop.hbase.catalog:org.apache.hadoop.hbase.client.coprocessor:org.apache.hadoop.hbase.client.metrics:org.apache.hadoop.hbase.codec*:org.apache.hadoop.hbase.constraint:org.apache.hadoop.hbase.coprocessor.*:org.apache.hadoop.hbase.executor:org.apache.hadoop.hbase.fs:org.apache.hadoop.hbase.generated.*:org.apache.hadoop.hbase.io.hfile.*:org.apache.hadoop.hbase.mapreduce.hadoopbackport:org.apache.hadoop.hbase.mapreduce.replication:org.apache.hadoop.hbase.master.*:org.apache.hadoop.hbase.metrics*:org.apache.hadoop.hbase.migration:org.apache.hadoop.hbase.monitoring:org.apache.hadoop.hbase.p*:org.apache.hadoop.hbase.regionserver.compactions:org.apache.hadoop.hbase.regionserver.handler:org.apache.hadoop.hbase.regionserver.snapshot:org.apache.hadoop.hbase.replication.*:org.apache.hadoop.hbase.rest.filter:org.apache.hadoop.hbase.rest.model:org.apache.hadoop.hbase.rest.p*:org.apache.hadoop.hbase.security.*:org.apache.hadoop.hbase.thrift*:org.apache.hadoop.hbase.tmpl.*:org.apache.hadoop.hbase.tool:org.apache.hadoop.hbase.trace:org.apache.hadoop.hbase.util.byterange*:org.apache.hadoop.hbase.util.test:org.apache.hadoop.hbase.util.vint:org.apache.hadoop.hbase.zookeeper.lock:org.apache.hadoop.metrics2*
+ org.apache.hadoop.hbase.backup*:org.apache.hadoop.hbase.catalog:org.apache.hadoop.hbase.client.coprocessor:org.apache.hadoop.hbase.client.metrics:org.apache.hadoop.hbase.codec*:org.apache.hadoop.hbase.constraint:org.apache.hadoop.hbase.coprocessor.*:org.apache.hadoop.hbase.executor:org.apache.hadoop.hbase.fs:*.generated.*:org.apache.hadoop.hbase.io.hfile.*:org.apache.hadoop.hbase.mapreduce.hadoopbackport:org.apache.hadoop.hbase.mapreduce.replication:org.apache.hadoop.hbase.master.*:org.apache.hadoop.hbase.metrics*:org.apache.hadoop.hbase.migration:org.apache.hadoop.hbase.monitoring:org.apache.hadoop.hbase.p*:org.apache.hadoop.hbase.regionserver.compactions:org.apache.hadoop.hbase.regionserver.handler:org.apache.hadoop.hbase.regionserver.snapshot:org.apache.hadoop.hbase.replication.*:org.apache.hadoop.hbase.rest.filter:org.apache.hadoop.hbase.rest.model:org.apache.hadoop.hbase.rest.p*:org.apache.hadoop.hbase.security.*:org.apache.hadoop.hbase.thrift*:org.apache.hadoop.hbase.tmpl.*:org.apache.hadoop.hbase.tool:org.apache.hadoop.hbase.trace:org.apache.hadoop.hbase.util.byterange*:org.apache.hadoop.hbase.util.test:org.apache.hadoop.hbase.util.vint:org.apache.hadoop.hbase.zookeeper.lock:org.apache.hadoop.metrics2*
false
diff --git a/src/main/asciidoc/_chapters/appendix_acl_matrix.adoc b/src/main/asciidoc/_chapters/appendix_acl_matrix.adoc
new file mode 100644
index 0000000..e89738a
--- /dev/null
+++ b/src/main/asciidoc/_chapters/appendix_acl_matrix.adoc
@@ -0,0 +1,133 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[appendix]
+[[appendix_acl_matrix]]
+== Access Control Matrix
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+:toc: left
+:source-language: java
+
+The following matrix shows the minimum permission set required to perform operations in HBase.
+Before using the table, read through the information about how to interpret it.
+
+.Interpreting the ACL Matrix Table
+The following conventions are used in the ACL Matrix table:
+
+=== Scopes
+Permissions are evaluated starting at the widest scope and working to the narrowest scope.
+
+A scope corresponds to a level of the data model. From broadest to narrowest, the scopes are as follows:
+
+.Scopes
+* Global
+* Namespace (NS)
+* Table
+* Column Family (CF)
+* Column Qualifier (CQ)
+* Cell
+
+For instance, a permission granted at table level dominates any grants done at the Column Family, Column Qualifier, or cell level. The user can do what that grant implies at any location in the table. A permission granted at global scope dominates all: the user is always allowed to take that action everywhere.
+
+=== Permissions
+Possible permissions include the following:
+
+.Permissions
+* Superuser - a special user that belongs to group "supergroup" and has unlimited access
+* Admin (A)
+* Create \(C)
+* Write (W)
+* Read \(R)
+* Execute (X)
+
+For the most part, permissions work in an expected way, with the following caveats:
+
+Having Write permission does not imply Read permission.::
+ It is possible and sometimes desirable for a user to be able to write data that same user cannot read. One such example is a log-writing process.
+The [systemitem]+hbase:meta+ table is readable by every user, regardless of the user's other grants or restrictions.::
+ This is a requirement for HBase to function correctly.
+[code]+CheckAndPut+ and [code]+CheckAndDelete+ operations will fail if the user does not have both Write and Read permission.::
+[code]+Increment+ and [code]+Append+ operations do not require Read access.::
+
+The following table is sorted by the interface that provides each operation.
+In case the table goes out of date, the unit tests which check for accuracy of permissions can be found in [path]_hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java_, and the access controls themselves can be examined in [path]_hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java_.
+
+.ACL Matrix
+[cols="1,1,1,1", frame="all", options="header"]
+|===
+| Interface | Operation | Minimum Scope | Minimum Permission
+| Master | createTable | Global | C
+| | modifyTable | Table | A\|C
+| | deleteTable | Table | A\|C
+| | truncateTable | Table | A\|C
+| | addColumn | Table | A\|C
+| | modifyColumn | Table | A\|C
+| | deleteColumn | Table | A\|C
+| | disableTable | Table | A\|C
+| | disableAclTable | None | Not allowed
+| | enableTable | Table | A\|C
+| | move | Global | A
+| | assign | Global | A
+| | unassign | Global | A
+| | regionOffline | Global | A
+| | balance | Global | A
+| | balanceSwitch | Global | A
+| | shutdown | Global | A
+| | stopMaster | Global | A
+| | snapshot | Global | A
+| | clone | Global | A
+| | restore | Global | A
+| | deleteSnapshot | Global | A
+| | createNamespace | Global | A
+| | deleteNamespace | Namespace | A
+| | modifyNamespace | Namespace | A
+| | flushTable | Table | A\|C
+| | getTableDescriptors | Global\|Table | A
+| | mergeRegions | Global | A
+| Region | openRegion | Global | A
+| | closeRegion | Global | A
+| | stopRegionServer | Global | A
+| | rollHLog | Global | A
+| | mergeRegions | Global | A
+| | flush | Global\|Table | A\|C
+| | split | Global\|Table | A
+| | compact | Global\|Table | A\|C
+| | bulkLoadHFile | Table | W
+| | prepareBulkLoad | Table |C
+| | cleanupBulkLoad | Table |W
+| | checkAndDelete | Table\|CF\|CQ | RW
+| | checkAndPut | Table\|CF\|CQ | RW
+| | incrementColumnValue | Table\|CF\|CQ | RW
+| | scannerClose | Table | R
+| | scannerNext | Table | R
+| | scannerOpen | Table\|CF\|CQ | R
+| Endpoint | invoke | Endpoint | X
+| AccessController | grant | Global\|Table\|NS | A
+| | revoke | Global\|Table\|NS | A
+| | getUserPermissions | Global\|Table\|NS | A
+| | checkPermissions | Global\|Table\|NS | A
+|===
+
+:numbered:
diff --git a/src/main/asciidoc/_chapters/appendix_contributing_to_documentation.adoc b/src/main/asciidoc/_chapters/appendix_contributing_to_documentation.adoc
new file mode 100644
index 0000000..9cf21e5
--- /dev/null
+++ b/src/main/asciidoc/_chapters/appendix_contributing_to_documentation.adoc
@@ -0,0 +1,478 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[appendix]
+[[appendix_contributing_to_documentation]]
+== Contributing to Documentation
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+:toc: left
+:source-language: java
+
+The Apache HBase project welcomes contributions to all aspects of the project, including the documentation.
+
+In HBase, documentation includes the following areas, and probably some others:
+
+* The link:http://hbase.apache.org/book.html[HBase Reference
+ Guide] (this book)
+* The link:http://hbase.apache.org/[HBase website]
+* The link:http://wiki.apache.org/hadoop/Hbase[HBase
+ Wiki]
+* API documentation
+* Command-line utility output and help text
+* Web UI strings, explicit help text, context-sensitive strings, and others
+* Log messages
+* Comments in source files, configuration files, and others
+* Localization of any of the above into target languages other than English
+
+No matter which area you want to help out with, the first step is almost always to download (typically by cloning the Git repository) and familiarize yourself with the HBase source code.
+The only exception in the list above is the HBase Wiki, which is edited online.
+For information on downloading and building the source, see <>.
+
+=== Getting Access to the Wiki
+
+The HBase Wiki is not well-maintained and much of its content has been moved into the HBase Reference Guide (this guide). However, some pages on the Wiki are well maintained, and it would be great to have some volunteers willing to help out with the Wiki.
+To request access to the Wiki, register a new account at link:https://wiki.apache.org/hadoop/Hbase?action=newaccount[https://wiki.apache.org/hadoop/Hbase?action=newaccount].
+Contact one of the HBase committers, who can either give you access or refer you to someone who can.
+
+=== Contributing to Documentation or Other Strings
+
+If you spot an error in a string in a UI, utility, script, log message, or elsewhere, or you think something could be made more clear, or you think text needs to be added where it doesn't currently exist, the first step is to file a JIRA.
+Be sure to set the component to [literal]+Documentation+ in addition any other involved components.
+Most components have one or more default owners, who monitor new issues which come into those queues.
+Regardless of whether you feel able to fix the bug, you should still file bugs where you see them.
+
+If you want to try your hand at fixing your newly-filed bug, assign it to yourself.
+You will need to clone the HBase Git repository to your local system and work on the issue there.
+When you have developed a potential fix, submit it for review.
+If it addresses the issue and is seen as an improvement, one of the HBase committers will commit it to one or more branches, as appropriate.
+
+.Procedure: Suggested Work flow for Submitting Patches
+This procedure goes into more detail than Git pros will need, but is included in this appendix so that people unfamiliar with Git can feel confident contributing to HBase while they learn.
+
+. If you have not already done so, clone the Git repository locally.
+ You only need to do this once.
+. Fairly often, pull remote changes into your local repository by using the [code]+git pull+ command, while your master branch is checked out.
+. For each issue you work on, create a new branch.
+ One convention that works well for naming the branches is to name a given branch the same as the JIRA it relates to:
++
+----
+$ git checkout -b HBASE-123456
+----
+
+. Make your suggested changes on your branch, committing your changes to your local repository often.
+ If you need to switch to working on a different issue, remember to check out the appropriate branch.
+. When you are ready to submit your patch, first be sure that HBase builds cleanly and behaves as expected in your modified branch.
+ If you have made documentation changes, be sure the documentation and website builds.
++
+NOTE: Before you use the [literal]+site+ target the very first time, be sure you have built HBase at least once, in order to fetch all the Maven dependencies you need.
++
+----
+$ mvn clean install -DskipTests # Builds HBase
+----
++
+----
+$ mvn clean site -DskipTests # Builds the website and documentation
+----
++
+If any errors occur, address them.
+
+. If it takes you several days or weeks to implement your fix, or you know that the area of the code you are working in has had a lot of changes lately, make sure you rebase your branch against the remote master and take care of any conflicts before submitting your patch.
++
+----
+
+$ git checkout HBASE-123456
+$ git rebase origin/master
+----
+
+. Generate your patch against the remote master.
+ Run the following command from the top level of your git repository (usually called [literal]+hbase+):
++
+----
+$ git diff --no-prefix origin/master > HBASE-123456.patch
+----
++
+The name of the patch should contain the JIRA ID.
+Look over the patch file to be sure that you did not change any additional files by accident and that there are no other surprises.
+When you are satisfied, attach the patch to the JIRA and click the btn:[Patch Available] button.
+A reviewer will review your patch.
+If you need to submit a new version of the patch, leave the old one on the JIRA and add a version number to the name of the new patch.
+
+. After a change has been committed, there is no need to keep your local branch around.
+ Instead you should run +git pull+ to get the new change into your master branch.
+
+=== Editing the HBase Website
+
+The source for the HBase website is in the HBase source, in the [path]_src/main/site/_ directory.
+Within this directory, source for the individual pages is in the [path]_xdocs/_ directory, and images referenced in those pages are in the [path]_images/_ directory.
+This directory also stores images used in the HBase Reference Guide.
+
+The website's pages are written in an HTML-like XML dialect called xdoc, which has a reference guide at link:http://maven.apache.org/archives/maven-1.x/plugins/xdoc/reference/xdocs.html.
+You can edit these files in a plain-text editor, an IDE, or an XML editor such as XML Mind XML Editor (XXE) or Oxygen XML Author.
+
+To preview your changes, build the website using the +mvn clean site
+ -DskipTests+ command.
+The HTML output resides in the [path]_target/site/_ directory.
+When you are satisfied with your changes, follow the procedure in <> to submit your patch.
+
+=== HBase Reference Guide Style Guide and Cheat Sheet
+
+We may be converting the HBase Reference Guide to use link:http://asciidoctor.org[AsciiDoctor]. In case that happens, the following cheat sheet is included for your reference. More nuanced and comprehensive documentation is available at link:http://asciidoctor.org/docs/user-manual/. To skip down to the Docbook stuff, see <>.
+
+.AsciiDoc Cheat Sheet
+[cols="1,1,a",options="header"]
+|===
+| Element Type | Desired Rendering | How to do it
+| A paragraph | a paragraph | Just type some text with a blank line at the top and bottom.
+| Add line breaks within a paragraph without adding blank lines | Manual line breaks | This will break + at the plus sign. Or prefix the whole paragraph with a line containing '[%hardbreaks]'
+| Give a title to anything | Colored italic bold differently-sized text | .MyTitle (no space between the period and the words) on the line before the thing to be titled
+| In-Line Code or commands | monospace | \`text`
+| In-line literal content (things to be typed exactly as shown) | bold mono | \*\`typethis`*
+| In-line replaceable content (things to substitute with your own values) | bold italic mono | \*\_typesomething_*
+| Code blocks with highlighting | monospace, highlighted, preserve space |
+........
+[source,java]
+----
+ myAwesomeCode() {
+}
+----
+........
+| Code block included from a separate file | included just as though it were part of the main file |
+................
+[source,ruby]
+----
+include\::path/to/app.rb[]
+----
+................
+| Include only part of a separate file | Similar to Javadoc | See link:http://asciidoctor.org/docs/user-manual/#by-tagged-regions
+| Filenames, directory names, new terms | italic | \_hbase-default.xml_
+| External naked URLs | A link with the URL as link text |
+----
+link:http://www.google.com
+----
+
+| External URLs with text | A link with arbitrary link text |
+----
+link:http://www.google.com[Google]
+----
+
+| Create an internal anchor to cross-reference | not rendered |
+----
+[[anchor_name]]
+----
+| Cross-reference an existing anchor using its default title| an internal hyperlink using the element title if available, otherwise using the anchor name |
+----
+<>
+----
+| Cross-reference an existing anchor using custom text | an internal hyperlink using arbitrary text |
+----
+<>
+----
+| An block image | The image with alt text |
+----
+image::sunset.jpg[Alt Text]
+----
+(put the image in the src/main/site/resources/images directory)
+| An inline image | The image with alt text, as part of the text flow |
+----
+image:sunset.jpg [Alt Text]
+----
+(only one colon)
+| Link to a remote image | show an image hosted elsewhere |
+----
+image::http://inkscape.org/doc/examples/tux.svg[Tux,250,350]
+----
+(or `image:`)
+| Add dimensions or a URL to the image | depends | inside the brackets after the alt text, specify width, height and/or link="http://my_link.com"
+| A footnote | subscript link which takes you to the footnote |
+----
+Some text.footnote:[The footnote text.]
+----
+| A note or warning with no title | The admonition image followed by the admonition |
+----
+NOTE:My note here
+----
+
+----
+WARNING:My warning here
+----
+| A complex note | The note has a title and/or multiple paragraphs and/or code blocks or lists, etc |
+........
+.The Title
+[NOTE]
+====
+Here is the note text. Everything until the second set of four equals signs is part of the note.
+----
+some source code
+----
+====
+........
+| Bullet lists | bullet lists |
+----
+* list item 1
+----
+(see http://asciidoctor.org/docs/user-manual/#unordered-lists)
+| Numbered lists | numbered list |
+----
+. list item 2
+----
+(see http://asciidoctor.org/docs/user-manual/#ordered-lists)
+| Checklists | Checked or unchecked boxes |
+Checked:
+----
+- [*]
+----
+Unchecked:
+----
+- [ ]
+----
+| Multiple levels of lists | bulleted or numbered or combo |
+----
+. Numbered (1), at top level
+* Bullet (2), nested under 1
+* Bullet (3), nested under 1
+. Numbered (4), at top level
+* Bullet (5), nested under 4
+** Bullet (6), nested under 5
+- [x] Checked (7), at top level
+----
+| Labelled lists / variablelists | a list item title or summary followed by content |
+----
+Title:: content
+
+Title::
+ content
+----
+| Sidebars, quotes, or other blocks of text | a block of text, formatted differently from the default | Delimited using different delimiters, see link:http://asciidoctor.org/docs/user-manual/#built-in-blocks-summary. Some of the examples above use delimiters like \...., ----,====.
+........
+[example]
+====
+This is an example block.
+====
+
+[source]
+----
+This is a source block.
+----
+
+[note]
+====
+This is a note block.
+====
+
+[quote]
+____
+This is a quote block.
+____
+........
+
+If you want to insert literal Asciidoc content that keeps being interpreted, when in doubt, use eight dots as the delimiter at the top and bottom.
+| Nested Sections | chapter, section, sub-section, etc |
+----
+= Book (or chapter if the chapter can be built alone, see the leveloffset info below)
+
+== Chapter (or section if the chapter is standalone)
+
+=== Section (or subsection, etc)
+
+==== Subsection
+----
+
+and so on up to 6 levels (think carefully about going deeper than 4 levels, maybe you can just titled paragraphs or lists instead). Note that you can include a book inside another book by adding the `:leveloffset:+1` macro directive directly before your include, and resetting it to 0 directly after. See the _book.adoc_ source for examples, as this is how this guide handles chapters. *Don't do it for prefaces, glossaries, appendixes, or other special types of chapters.*
+
+| Include one file from another | Content is included as though it were inline |
+
+----
+include::[/path/to/file.adoc]
+----
+
+For plenty of examples. see _book.adoc_.
+| A table | a table | See http://asciidoctor.org/docs/user-manual/#tables. Generally rows are separated by newlines and columns by pipes
+| Comment out a single line | A line is skipped during rendering |
+----
+// This line won't show up
+----
+| Comment out a block | A section of the file is skipped during rendering |
+----
+////
+Nothing between the slashes will show up.
+////
+----
+| Highlight text for review | text shows up with yellow background |
+----
+Test between #hash marks# is highlighted yellow.
+----
+|===
+
+[[docbook.editing]]
+=== Editing the HBase Reference Guide
+
+The source for the HBase Reference Guide is in the HBase source, in the [path]_src/main/docbkx/_ directory.
+It is written in link:http://www.docbook.org/[Docbook] XML.
+Docbook can be intimidating, but you can typically follow the formatting of the surrounding file to get an idea of the mark-up.
+You can edit Docbook XML files using a plain-text editor, an XML-aware IDE, or a specialized XML editor.
+
+Docbook's syntax can be picky.
+Before submitting a patch, be sure to build the output locally using the +mvn site+ command.
+If you do not get any build errors, that means that the XML is well-formed, which means that each opening tag is balanced by a closing tag.
+Well-formedness is not exactly the same as validity.
+Check the output in [path]_target/docbkx/_ for any surprises before submitting a patch.
+
+=== Auto-Generated Content
+
+Some parts of the HBase Reference Guide, most notably <>, are generated automatically, so that this area of the documentation stays in sync with the code.
+This is done by means of an XSLT transform, which you can examine in the source at [path]_src/main/xslt/configuration_to_docbook_section.xsl_.
+This transforms the [path]_hbase-common/src/main/resources/hbase-default.xml_ file into a Docbook output which can be included in the Reference Guide.
+Sometimes, it is necessary to add configuration parameters or modify their descriptions.
+Make the modifications to the source file, and they will be included in the Reference Guide when it is rebuilt.
+
+It is possible that other types of content can and will be automatically generated from HBase source files in the future.
+
+=== Multi-Page and Single-Page Output
+
+You can examine the [literal]+site+ target in the Maven [path]_pom.xml_ file included at the top level of the HBase source for details on the process of building the website and documentation.
+The Reference Guide is built twice, once as a single-page output and once with one HTML file per chapter.
+The single-page output is located in [path]_target/docbkx/book.html_, while the multi-page output's index page is at [path]_target/docbkx/book/book.html_.
+Each of these outputs has its own [path]_images/_ and [path]_css/_ directories, which are created at build time.
+
+=== Images in the HBase Reference Guide
+
+You can include images in the HBase Reference Guide.
+For accessibility reasons, it is recommended that you use a
Docbook element for an image.
+This allows screen readers to navigate to the image and also provides alternative text for the image.
+The following is an example of a
element.
+
+[source,xml]
+----
+
+ HFile Version 1
+
+
+
+
+
+ HFile Version 1
+
+
+
+----
+
+The can contain a few sentences describing the image, rather than simply reiterating the title.
+You can optionally specify alignment and size options in the element.
+
+When doing a local build, save the image to the [path]_src/main/site/resources/images/_ directory.
+In the element, refer to the image as above, with no directory component.
+The image will be copied to the appropriate target location during the build of the output.
+
+When you submit a patch which includes adding an image to the HBase Reference Guide, attach the image to the JIRA.
+If the committer asks where the image should be committed, it should go into the above directory.
+
+=== Adding a New Chapter to the HBase Reference Guide
+
+If you want to add a new chapter to the HBase Reference Guide, the easiest way is to copy an existing chapter file, rename it, and change the ID and title elements near the top of the file.
+Delete the existing content and create the new content.
+Then open the [path]_book.xml_ file, which is the main file for the HBase Reference Guide, and use an element to include your new chapter in the appropriate location.
+Be sure to add your new file to your Git repository before creating your patch.
+Note that the [path]_book.xml_ file currently contains many chapters.
+You can only include a chapter at the same nesting levels as the other chapters in the file.
+When in doubt, check to see how other files have been included.
+
+=== Docbook Common Issues
+
+The following Docbook issues come up often.
+Some of these are preferences, but others can create mysterious build errors or other problems.
+
+[qanda]
+What can go where?::
+ There is often confusion about which child elements are valid in a given context. When in doubt, Docbook: The Definitive Guide is the best resource. It has an appendix which is indexed by element and contains all valid child and parent elements of any given element. If you edit Docbook often, a schema-aware XML editor makes things easier.
+
+Paragraphs and Admonitions::
+ It is a common pattern, and it is technically valid, to put an admonition such as a inside a element. Because admonitions render as block-level elements (they take the whole width of the page), it is better to mark them up as siblings to the paragraphs around them, like this:
++
+[source,xml]
+----
+This is the paragraph.
+
+ This is an admonition which occurs after the paragraph.
+
+----
+
+Wrap textual and contents in elements.::
+ Because the contents of a (an element in an itemized, ordered, or variable list) or an (a cell in a table) can consist of things other than plain text, they need to be wrapped in some element. If they are plain text, they need to be inclosed in tags. This is tedious but necessary for validity.
++
+[source,xml]
+----
+
+
+ This is a paragraph.
+
+
+ This is screen output.
+
+
+----
+
+When to use , , , ?::
+ The first two are in-line tags, which can occur within the flow of paragraphs or titles. The second two are block elements.
++
+Use to mention a command such as hbase shell in the flow of a sentence. Use for other inline text referring to code. Incidentally, use to specify literal strings that should be typed or entered exactly as shown. Within a listing, it can be helpful to use the and elements to mark up the text further.
++
+Use to display input and output as the user would see it on the screen, in a log file, etc. Use only for blocks of code that occur within a file, such as Java or XML code, or a Bash shell script.
+
+How to escape XML elements so that they show up as XML?::
+ For one-off instances or short in-line mentions, use the `<` and `>` encoded characters. For longer mentions, or blocks of code, enclose it with `<![CDATA[]]>`, which is much easier to maintain and parse in the source files.
+
+Tips and tricks for making screen output look good::
+ Text within and elements is shown exactly as it appears in the source, including indentation, tabs, and line wrap.
++
+Indent the starting and closing XML elements, but do not indent the content. Also, to avoid having an extra blank line at the beginning of the programlisting output, do not put the CDATA element on its own line. For example:
++
+[source,xml]
+----
+
+case $1 in
+ --cleanZk|--cleanHdfs|--cleanAll)
+ matches="yes" ;;
+ *) ;;
+esac
+
+----
++
+After pasting code into a programlisting, fix the indentation manually, using two spaces per desired indentation. For screen output, be sure to include line breaks so that the text is no longer than 100 characters.
+
+Isolate Changes for Easy Diff Review.::
+ Be careful with pretty-printing or re-formatting an entire XML file, even if the formatting has degraded over time. If you need to reformat a file, do that in a separate JIRA where you do not change any content. Be careful because some XML editors do a bulk-reformat when you open a new file, especially if you use GUI mode in the editor.
+
+Syntax Highlighting::
+ The HBase Reference Guide uses the XSLT Syntax Highlighting Maven module for syntax highlighting. To enable syntax highlighting for a given or (or possibly other elements), add the attribute language=LANGUAGE_OF_CHOICE to the element, as in the following example:
+ +
+ [source,xml]
+ ----
+
+ bar
+ foo
+
+ ----
+ +
+ Several syntax types are supported. The most interesting ones for the HBase Reference Guide are java, xml, sql, and bourne (for BASH shell output or Linux command-line examples).
diff --git a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc
new file mode 100644
index 0000000..c2b0709
--- /dev/null
+++ b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc
@@ -0,0 +1,355 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[appendix]
+== HFile format
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+:toc: left
+:source-language: java
+
+This appendix describes the evolution of the HFile format.
+
+[[hfilev1]]
+=== HBase File Format (version 1)
+
+As we will be discussing changes to the HFile format, it is useful to give a short overview of the original (HFile version 1) format.
+
+[[hfilev1.overview]]
+==== Overview of Version 1
+
+An HFile in version 1 format is structured as follows:
+
+.HFile V1 Format
+image::hfile.png[HFile Version 1]
+
+==== Block index format in version 1
+
+The block index in version 1 is very straightforward.
+For each entry, it contains:
+
+. Offset (long)
+. Uncompressed size (int)
+. Key (a serialized byte array written using Bytes.writeByteArray)
+.. Key length as a variable-length integer (VInt)
+.. Key bytes
+
+
+The number of entries in the block index is stored in the fixed file trailer, and has to be passed in to the method that reads the block index.
+One of the limitations of the block index in version 1 is that it does not provide the compressed size of a block, which turns out to be necessary for decompression.
+Therefore, the HFile reader has to infer this compressed size from the offset difference between blocks.
+We fix this limitation in version 2, where we store on-disk block size instead of uncompressed size, and get uncompressed size from the block header.
+
+[[hfilev2]]
+=== HBase file format with inline blocks (version 2)
+
+Note: this feature was introduced in HBase 0.92
+
+==== Motivation
+
+We found it necessary to revise the HFile format after encountering high memory usage and slow startup times caused by large Bloom filters and block indexes in the region server.
+Bloom filters can get as large as 100 MB per HFile, which adds up to 2 GB when aggregated over 20 regions.
+Block indexes can grow as large as 6 GB in aggregate size over the same set of regions.
+A region is not considered opened until all of its block index data is loaded.
+Large Bloom filters produce a different performance problem: the first get request that requires a Bloom filter lookup will incur the latency of loading the entire Bloom filter bit array.
+
+To speed up region server startup we break Bloom filters and block indexes into multiple blocks and write those blocks out as they fill up, which also reduces the HFile writer's memory footprint.
+In the Bloom filter case, "filling up a block" means accumulating enough keys to efficiently utilize a fixed-size bit array, and in the block index case we accumulate an "index block" of the desired size.
+Bloom filter blocks and index blocks (we call these "inline blocks") become interspersed with data blocks, and as a side effect we can no longer rely on the difference between block offsets to determine data block length, as it was done in version 1.
+
+HFile is a low-level file format by design, and it should not deal with application-specific details such as Bloom filters, which are handled at StoreFile level.
+Therefore, we call Bloom filter blocks in an HFile "inline" blocks.
+We also supply HFile with an interface to write those inline blocks.
+
+Another format modification aimed at reducing the region server startup time is to use a contiguous "load-on-open" section that has to be loaded in memory at the time an HFile is being opened.
+Currently, as an HFile opens, there are separate seek operations to read the trailer, data/meta indexes, and file info.
+To read the Bloom filter, there are two more seek operations for its "data" and "meta" portions.
+In version 2, we seek once to read the trailer and seek again to read everything else we need to open the file from a contiguous block.
+
+[[hfilev2.overview]]
+==== Overview of Version 2
+
+The version of HBase introducing the above features reads both version 1 and 2 HFiles, but only writes version 2 HFiles.
+A version 2 HFile is structured as follows:
+
+.HFile Version 2 Structure
+image:hfilev2.png[HFile Version 2]
+
+==== Unified version 2 block format
+
+In the version 2 every block in the data section contains the following fields:
+
+. 8 bytes: Block type, a sequence of bytes equivalent to version 1's "magic records". Supported block types are:
+.. DATA – data blocks
+.. LEAF_INDEX – leaf-level index blocks in a multi-level-block-index
+.. BLOOM_CHUNK – Bloom filter chunks
+.. META – meta blocks (not used for Bloom filters in version 2 anymore)
+.. INTERMEDIATE_INDEX – intermediate-level index blocks in a multi-level blockindex
+.. ROOT_INDEX – root>level index blocks in a multi>level block index
+.. FILE_INFO – the ``file info'' block, a small key>value map of metadata
+.. BLOOM_META – a Bloom filter metadata block in the load>on>open section
+.. TRAILER – a fixed>size file trailer.
+ As opposed to the above, this is not an HFile v2 block but a fixed>size (for each HFile version) data structure
+.. INDEX_V1 – this block type is only used for legacy HFile v1 block
+. Compressed size of the block's data, not including the header (int).
++
+Can be used for skipping the current data block when scanning HFile data.
+. Uncompressed size of the block's data, not including the header (int)
++
+This is equal to the compressed size if the compression algorithm is NONE
+. File offset of the previous block of the same type (long)
++
+Can be used for seeking to the previous data/index block
+. Compressed data (or uncompressed data if the compression algorithm is NONE).
+
+The above format of blocks is used in the following HFile sections:
+
+Scanned block section::
+ The section is named so because it contains all data blocks that need to be read when an HFile is scanned sequentially.
+ Also contains leaf block index and Bloom chunk blocks.
+Non-scanned block section::
+ This section still contains unified-format v2 blocks but it does not have to be read when doing a sequential scan.
+ This section contains "meta" blocks and intermediate-level index blocks.
+
+We are supporting "meta" blocks in version 2 the same way they were supported in version 1, even though we do not store Bloom filter data in these blocks anymore.
+
+==== Block index in version 2
+
+There are three types of block indexes in HFile version 2, stored in two different formats (root and non-root):
+
+. Data index -- version 2 multi-level block index, consisting of:
+.. Version 2 root index, stored in the data block index section of the file
+.. Optionally, version 2 intermediate levels, stored in the non%root format in the data index section of the file. Intermediate levels can only be present if leaf level blocks are present
+.. Optionally, version 2 leaf levels, stored in the non%root format inline with data blocks
+. Meta index -- version 2 root index format only, stored in the meta index section of the file
+. Bloom index -- version 2 root index format only, stored in the ``load-on-open'' section as part of Bloom filter metadata.
+
+==== Root block index format in version 2
+
+This format applies to:
+
+. Root level of the version 2 data index
+. Entire meta and Bloom indexes in version 2, which are always single-level.
+
+A version 2 root index block is a sequence of entries of the following format, similar to entries of a version 1 block index, but storing on-disk size instead of uncompressed size.
+
+. Offset (long)
++
+This offset may point to a data block or to a deeper>level index block.
+
+. On-disk size (int)
+. Key (a serialized byte array stored using Bytes.writeByteArray)
++
+. Key (VInt)
+. Key bytes
+
+
+A single-level version 2 block index consists of just a single root index block.
+To read a root index block of version 2, one needs to know the number of entries.
+For the data index and the meta index the number of entries is stored in the trailer, and for the Bloom index it is stored in the compound Bloom filter metadata.
+
+For a multi-level block index we also store the following fields in the root index block in the load-on-open section of the HFile, in addition to the data structure described above:
+
+. Middle leaf index block offset
+. Middle leaf block on-disk size (meaning the leaf index block containing the reference to the ``middle'' data block of the file)
+. The index of the mid-key (defined below) in the middle leaf-level block.
+
+
+
+These additional fields are used to efficiently retrieve the mid-key of the HFile used in HFile splits, which we define as the first key of the block with a zero-based index of (n – 1) / 2, if the total number of blocks in the HFile is n.
+This definition is consistent with how the mid-key was determined in HFile version 1, and is reasonable in general, because blocks are likely to be the same size on average, but we don't have any estimates on individual key/value pair sizes.
+
+
+
+When writing a version 2 HFile, the total number of data blocks pointed to by every leaf-level index block is kept track of.
+When we finish writing and the total number of leaf-level blocks is determined, it is clear which leaf-level block contains the mid-key, and the fields listed above are computed.
+When reading the HFile and the mid-key is requested, we retrieve the middle leaf index block (potentially from the block cache) and get the mid-key value from the appropriate position inside that leaf block.
+
+==== Non-root block index format in version 2
+
+This format applies to intermediate-level and leaf index blocks of a version 2 multi-level data block index.
+Every non-root index block is structured as follows.
+
+. numEntries: the number of entries (int).
+. entryOffsets: the ``secondary index'' of offsets of entries in the block, to facilitate a quick binary search on the key (numEntries + 1 int values). The last value is the total length of all entries in this index block.
+ For example, in a non-root index block with entry sizes 60, 80, 50 the ``secondary index'' will contain the following int array: {0, 60, 140, 190}.
+. Entries.
+ Each entry contains:
++
+. Offset of the block referenced by this entry in the file (long)
+. On>disk size of the referenced block (int)
+. Key.
+ The length can be calculated from entryOffsets.
+
+
+==== Bloom filters in version 2
+
+In contrast with version 1, in a version 2 HFile Bloom filter metadata is stored in the load-on-open section of the HFile for quick startup.
+
+. A compound Bloom filter.
++
+. Bloom filter version = 3 (int). There used to be a DynamicByteBloomFilter class that had the Bloom filter version number 2
+. The total byte size of all compound Bloom filter chunks (long)
+. Number of hash functions (int
+. Type of hash functions (int)
+. The total key count inserted into the Bloom filter (long)
+. The maximum total number of keys in the Bloom filter (long)
+. The number of chunks (int)
+. Comparator class used for Bloom filter keys, a UTF>8 encoded string stored using Bytes.writeByteArray
+. Bloom block index in the version 2 root block index format
+
+
+==== File Info format in versions 1 and 2
+
+The file info block is a serialized link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/io/HbaseMapWritable.html[HbaseMapWritable] (essentially a map from byte arrays to byte arrays) with the following keys, among others.
+StoreFile-level logic adds more keys to this.
+
+[cols="1,1", frame="all"]
+|===
+|hfile.LASTKEY| The last key of the file (byte array)
+|hfile.AVG_KEY_LEN| The average key length in the file (int)
+|hfile.AVG_VALUE_LEN| The average value length in the file (int)
+|===
+
+File info format did not change in version 2.
+However, we moved the file info to the final section of the file, which can be loaded as one block at the time the HFile is being opened.
+Also, we do not store comparator in the version 2 file info anymore.
+Instead, we store it in the fixed file trailer.
+This is because we need to know the comparator at the time of parsing the load-on-open section of the HFile.
+
+==== Fixed file trailer format differences between versions 1 and 2
+
+The following table shows common and different fields between fixed file trailers in versions 1 and 2.
+Note that the size of the trailer is different depending on the version, so it is ``fixed'' only within one version.
+However, the version is always stored as the last four-byte integer in the file.
+
+.Differences between HFile Versions 1 and 2
+[cols="1,1", frame="all"]
+|===
+| Version 1 | Version 2
+| |File info offset (long)
+| Data index offset (long)| loadOnOpenOffset (long) /The offset of the sectionthat we need toload when opening the file./
+| | Number of data index entries (int)
+| metaIndexOffset (long) /This field is not being used by the version 1 reader, so we removed it from version 2./ | uncompressedDataIndexSize (long) /The total uncompressed size of the whole data block index, including root-level, intermediate-level, and leaf-level blocks./
+| | Number of meta index entries (int)
+| | Total uncompressed bytes (long)
+| numEntries (int) | numEntries (long)
+| Compression codec: 0 = LZO, 1 = GZ, 2 = NONE (int) | Compression codec: 0 = LZO, 1 = GZ, 2 = NONE (int)
+| | The number of levels in the data block index (int)
+| | firstDataBlockOffset (long) /The offset of the first first data block. Used when scanning./
+| | lastDataBlockEnd (long) /The offset of the first byte after the last key/value data block. We don't need to go beyond this offset when scanning./
+| Version: 1 (int) | Version: 2 (int)
+|===
+
+
+
+==== getShortMidpointKey(an optimization for data index block)
+
+Note: this optimization was introduced in HBase 0.95+
+
+HFiles contain many blocks that contain a range of sorted Cells.
+Each cell has a key.
+To save IO when reading Cells, the HFile also has an index that maps a Cell's start key to the offset of the beginning of a particular block.
+Prior to this optimization, HBase would use the key of the first cell in each data block as the index key.
+
+In HBASE-7845, we generate a new key that is lexicographically larger than the last key of the previous block and lexicographically equal or smaller than the start key of the current block.
+While actual keys can potentially be very long, this "fake key" or "virtual key" can be much shorter.
+For example, if the stop key of previous block is "the quick brown fox", the start key of current block is "the who", we could use "the r" as our virtual key in our hfile index.
+
+There are two benefits to this:
+
+* having shorter keys reduces the hfile index size, (allowing us to keep more indexes in memory), and
+* using something closer to the end key of the previous block allows us to avoid a potential extra IO when the target key lives in between the "virtual key" and the key of the first element in the target block.
+
+This optimization (implemented by the getShortMidpointKey method) is inspired by LevelDB's ByteWiseComparatorImpl::FindShortestSeparator() and FindShortSuccessor().
+
+[[hfilev3]]
+=== HBase File Format with Security Enhancements (version 3)
+
+Note: this feature was introduced in HBase 0.98
+
+[[hfilev3.motivation]]
+==== Motivation
+
+Version 3 of HFile makes changes needed to ease management of encryption at rest and cell-level metadata (which in turn is needed for cell-level ACLs and cell-level visibility labels). For more information see <>, <>, <>, and <>.
+
+[[hfilev3.overview]]
+==== Overview
+
+The version of HBase introducing the above features reads HFiles in versions 1, 2, and 3 but only writes version 3 HFiles.
+Version 3 HFiles are structured the same as version 2 HFiles.
+For more information see <>.
+
+[[hvilev3.infoblock]]
+==== File Info Block in Version 3
+
+Version 3 added two additional pieces of information to the reserved keys in the file info block.
+
+[cols="1,1", frame="all"]
+|===
+| hfile.MAX_TAGS_LEN | The maximum number of bytes needed to store the serialized tags for any single cell in this hfile (int)
+ | hfile.TAGS_COMPRESSED | Does the block encoder for this hfile compress tags? (boolean). Should only be present if hfile.MAX_TAGS_LEN is also present.
+|===
+
+When reading a Version 3 HFile the presence of [class]+MAX_TAGS_LEN+ is used to determine how to deserialize the cells within a data block.
+Therefore, consumers must read the file's info block prior to reading any data blocks.
+
+When writing a Version 3 HFile, HBase will always include [class]+MAX_TAGS_LEN + when flushing the memstore to underlying filesystem and when using prefix tree encoding for data blocks, as described in <>.
+
+When compacting extant files, the default writer will omit [class]+MAX_TAGS_LEN+ if all of the files selected do not themselves contain any cells with tags.
+
+See <> for details on the compaction file selection algorithm.
+
+[[hfilev3.datablock]]
+==== Data Blocks in Version 3
+
+Within an HFile, HBase cells are stored in data blocks as a sequence of KeyValues (see <>, or link:http://www.larsgeorge.com/2009/10/hbase-architecture-101-storage.html[Lars George's
+ excellent introduction to HBase Storage]). In version 3, these KeyValue optionally will include a set of 0 or more tags:
+
+[cols="1,1", frame="all"]
+|===
+| Version 1 & 2, Version 3 without MAX_TAGS_LEN | Version 3 with MAX_TAGS_LEN
+2+| Key Length (4 bytes)
+2+| Value Length (4 bytes)
+2+| Key bytes (variable)
+2+| Value bytes (variable)
+| | Tags Length (2 bytes)
+| | Tags bytes (variable)
+|===
+
+If the info block for a given HFile contains an entry for [class]+MAX_TAGS_LEN+ each cell will have the length of that cell's tags included, even if that length is zero.
+The actual tags are stored as a sequence of tag length (2 bytes), tag type (1 byte), tag bytes (variable). The format an individual tag's bytes depends on the tag type.
+
+Note that the dependence on the contents of the info block implies that prior to reading any data blocks you must first process a file's info block.
+It also implies that prior to writing a data block you must know if the file's info block will include [class]+MAX_TAGS_LEN+.
+
+[[hfilev3.fixedtrailer]]
+==== Fixed File Trailer in Version 3
+
+The fixed file trailers written with HFile version 3 are always serialized with protocol buffers.
+Additionally, it adds an optional field to the version 2 protocol buffer named encryption_key.
+If HBase is configured to encrypt HFiles this field will store a data encryption key for this particular HFile, encrypted with the current cluster master key using AES.
+For more information see <>.
+
+:numbered:
diff --git a/src/main/asciidoc/_chapters/architecture.adoc b/src/main/asciidoc/_chapters/architecture.adoc
new file mode 100644
index 0000000..e501591
--- /dev/null
+++ b/src/main/asciidoc/_chapters/architecture.adoc
@@ -0,0 +1,2522 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+= Architecture
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+:toc: left
+:source-language: java
+
+[[arch.overview]]
+== Overview
+
+[[arch.overview.nosql]]
+=== NoSQL?
+
+HBase is a type of "NoSQL" database.
+"NoSQL" is a general term meaning that the database isn't an RDBMS which supports SQL as its primary access language, but there are many types of NoSQL databases: BerkeleyDB is an example of a local NoSQL database, whereas HBase is very much a distributed database.
+Technically speaking, HBase is really more a "Data Store" than "Data Base" because it lacks many of the features you find in an RDBMS, such as typed columns, secondary indexes, triggers, and advanced query languages, etc.
+
+However, HBase has many features which supports both linear and modular scaling.
+HBase clusters expand by adding RegionServers that are hosted on commodity class servers.
+If a cluster expands from 10 to 20 RegionServers, for example, it doubles both in terms of storage and as well as processing capacity.
+RDBMS can scale well, but only up to a point - specifically, the size of a single database server - and for the best performance requires specialized hardware and storage devices.
+HBase features of note are:
+
+* Strongly consistent reads/writes: HBase is not an "eventually consistent" DataStore.
+ This makes it very suitable for tasks such as high-speed counter aggregation.
+* Automatic sharding: HBase tables are distributed on the cluster via regions, and regions are automatically split and re-distributed as your data grows.
+* Automatic RegionServer failover
+* Hadoop/HDFS Integration: HBase supports HDFS out of the box as its distributed file system.
+* MapReduce: HBase supports massively parallelized processing via MapReduce for using HBase as both source and sink.
+* Java Client API: HBase supports an easy to use Java API for programmatic access.
+* Thrift/REST API: HBase also supports Thrift and REST for non-Java front-ends.
+* Block Cache and Bloom Filters: HBase supports a Block Cache and Bloom Filters for high volume query optimization.
+* Operational Management: HBase provides build-in web-pages for operational insight as well as JMX metrics.
+
+[[arch.overview.when]]
+=== When Should I Use HBase?
+
+HBase isn't suitable for every problem.
+
+First, make sure you have enough data.
+If you have hundreds of millions or billions of rows, then HBase is a good candidate.
+If you only have a few thousand/million rows, then using a traditional RDBMS might be a better choice due to the fact that all of your data might wind up on a single node (or two) and the rest of the cluster may be sitting idle.
+
+Second, make sure you can live without all the extra features that an RDBMS provides (e.g., typed columns, secondary indexes, transactions, advanced query languages, etc.) An application built against an RDBMS cannot be "ported" to HBase by simply changing a JDBC driver, for example.
+Consider moving from an RDBMS to HBase as a complete redesign as opposed to a port.
+
+Third, make sure you have enough hardware.
+Even HDFS doesn't do well with anything less than 5 DataNodes (due to things such as HDFS block replication which has a default of 3), plus a NameNode.
+
+HBase can run quite well stand-alone on a laptop - but this should be considered a development configuration only.
+
+[[arch.overview.hbasehdfs]]
+=== What Is The Difference Between HBase and Hadoop/HDFS?
+
+link:http://hadoop.apache.org/hdfs/[HDFS] is a distributed file system that is well suited for the storage of large files.
+Its documentation states that it is not, however, a general purpose file system, and does not provide fast individual record lookups in files.
+HBase, on the other hand, is built on top of HDFS and provides fast record lookups (and updates) for large tables.
+This can sometimes be a point of conceptual confusion.
+HBase internally puts your data in indexed "StoreFiles" that exist on HDFS for high-speed lookups.
+See the <> and the rest of this chapter for more information on how HBase achieves its goals.
+
+[[arch.catalog]]
+== Catalog Tables
+
+The catalog table [code]+hbase:meta+ exists as an HBase table and is filtered out of the HBase shell's [code]+list+ command, but is in fact a table just like any other.
+
+[[arch.catalog.root]]
+=== -ROOT-
+
+NOTE: The [code]+-ROOT-+ table was removed in HBase 0.96.0.
+Information here should be considered historical.
+
+The [code]+-ROOT-+ table kept track of the location of the [code]+.META+ table (the previous name for the table now called [code]+hbase:meta+) prior to HBase 0.96.
+The [code]+-ROOT-+ table structure was as follows:
+
+* .Key.META.
+ region key ([code]+.META.,,1+)
+
+* .Values[code]+info:regioninfo+ (serialized link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HRegionInfo.html[HRegionInfo] instance of hbase:meta)
+* [code]+info:server+ (server:port of the RegionServer holding hbase:meta)
+* [code]+info:serverstartcode+ (start-time of the RegionServer process holding hbase:meta)
+
+[[arch.catalog.meta]]
+=== hbase:meta
+
+The [code]+hbase:meta+ table (previously called [code]+.META.+) keeps a list of all regions in the system.
+The location of [code]+hbase:meta+ was previously tracked within the [code]+-ROOT-+ table, but is now stored in Zookeeper.
+
+The [code]+hbase:meta+ table structure is as follows:
+
+* .KeyRegion key of the format ([code]+[table],[region start key],[region
+ id]+)
+
+* .Values[code]+info:regioninfo+ (serialized link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HRegionInfo.html[
+ HRegionInfo] instance for this region)
+* [code]+info:server+ (server:port of the RegionServer containing this region)
+* [code]+info:serverstartcode+ (start-time of the RegionServer process containing this region)
+
+When a table is in the process of splitting, two other columns will be created, called [code]+info:splitA+ and [code]+info:splitB+.
+These columns represent the two daughter regions.
+The values for these columns are also serialized HRegionInfo instances.
+After the region has been split, eventually this row will be deleted.
+
+.Note on HRegionInfo
+[NOTE]
+====
+The empty key is used to denote table start and table end.
+A region with an empty start key is the first region in a table.
+If a region has both an empty start and an empty end key, it is the only region in the table
+====
+
+In the (hopefully unlikely) event that programmatic processing of catalog metadata is required, see the link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/util/Writables.html#getHRegionInfo%28byte[]%29[Writables] utility.
+
+[[arch.catalog.startup]]
+=== Startup Sequencing
+
+First, the location of [code]+hbase:meta+ is looked up in Zookeeper.
+Next, [code]+hbase:meta+ is updated with server and startcode values.
+
+For information on region-RegionServer assignment, see <>.
+
+[[architecture.client]]
+== Client
+
+The HBase client finds the RegionServers that are serving the particular row range of interest.
+It does this by querying the [code]+hbase:meta+ table.
+See <> for details.
+After locating the required region(s), the client contacts the RegionServer serving that region, rather than going through the master, and issues the read or write request.
+This information is cached in the client so that subsequent requests need not go through the lookup process.
+Should a region be reassigned either by the master load balancer or because a RegionServer has died, the client will requery the catalog tables to determine the new location of the user region.
+
+See <> for more information about the impact of the Master on HBase Client communication.
+
+Administrative functions are done via an instance of link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Admin.html[Admin]
+
+[[client.connections]]
+=== Cluster Connections
+
+The API changed in HBase 1.0.
+Its been cleaned up and users are returned Interfaces to work against rather than particular types.
+In HBase 1.0, obtain a cluster Connection from ConnectionFactory and thereafter, get from it instances of Table, Admin, and RegionLocator on an as-need basis.
+When done, close obtained instances.
+Finally, be sure to cleanup your Connection instance before exiting.
+Connections are heavyweight objects.
+Create once and keep an instance around.
+Table, Admin and RegionLocator instances are lightweight.
+Create as you go and then let go as soon as you are done by closing them.
+See the link:/Users/stack/checkouts/hbase.git/target/site/apidocs/org/apache/hadoop/hbase/client/package-summary.html[Client Package Javadoc Description] for example usage of the new HBase 1.0 API.
+
+For connection configuration information, see <>.
+
+_link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html[Table]
+ instances are not thread-safe_.
+Only one thread can use an instance of Table at any given time.
+When creating Table instances, it is advisable to use the same link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HBaseConfiguration[HBaseConfiguration] instance.
+This will ensure sharing of ZooKeeper and socket instances to the RegionServers which is usually what you want.
+For example, this is preferred:
+
+[source,java]
+----
+HBaseConfiguration conf = HBaseConfiguration.create();
+HTable table1 = new HTable(conf, "myTable");
+HTable table2 = new HTable(conf, "myTable");
+----
+
+as opposed to this:
+
+[source,java]
+----
+HBaseConfiguration conf1 = HBaseConfiguration.create();
+HTable table1 = new HTable(conf1, "myTable");
+HBaseConfiguration conf2 = HBaseConfiguration.create();
+HTable table2 = new HTable(conf2, "myTable");
+----
+
+For more information about how connections are handled in the HBase client, see link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HConnectionManager.html[HConnectionManager].
+
+[[client.connection.pooling]]
+==== Connection Pooling
+
+For applications which require high-end multithreaded access (e.g., web-servers or application servers that may serve many application threads in a single JVM), you can pre-create an [class]+HConnection+, as shown in the following example:
+
+.Pre-Creating a [code]+HConnection+
+====
+[source,java]
+----
+// Create a connection to the cluster.
+HConnection connection = HConnectionManager.createConnection(Configuration);
+HTableInterface table = connection.getTable("myTable");
+// use table as needed, the table returned is lightweight
+table.close();
+// use the connection for other access to the cluster
+connection.close();
+----
+====
+
+Constructing HTableInterface implementation is very lightweight and resources are controlled.
+
+.[code]+HTablePool+ is Deprecated
+[WARNING]
+====
+Previous versions of this guide discussed [code]+HTablePool+, which was deprecated in HBase 0.94, 0.95, and 0.96, and removed in 0.98.1, by link:https://issues.apache.org/jira/browse/HBASE-6580[HBASE-6500].
+Please use link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HConnection.html[HConnection] instead.
+====
+
+[[client.writebuffer]]
+=== WriteBuffer and Batch Methods
+
+If <> is turned off on link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html[HTable], [class]+Put+s are sent to RegionServers when the writebuffer is filled.
+The writebuffer is 2MB by default.
+Before an HTable instance is discarded, either [method]+close()+ or [method]+flushCommits()+ should be invoked so Puts will not be lost.
+
+Note: [code]+htable.delete(Delete);+ does not go in the writebuffer! This only applies to Puts.
+
+For additional information on write durability, review the link:../acid-semantics.html[ACID semantics] page.
+
+For fine-grained control of batching of [class]+Put+s or [class]+Delete+s, see the link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html#batch%28java.util.List%29[batch] methods on HTable.
+
+[[client.external]]
+=== External Clients
+
+Information on non-Java clients and custom protocols is covered in <>
+
+[[client.filter]]
+== Client Request Filters
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html[Get] and link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html[Scan] instances can be optionally configured with link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/Filter.html[filters] which are applied on the RegionServer.
+
+Filters can be confusing because there are many different types, and it is best to approach them by understanding the groups of Filter functionality.
+
+[[client.filter.structural]]
+=== Structural
+
+Structural Filters contain other Filters.
+
+[[client.filter.structural.fl]]
+==== FilterList
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FilterList.html[FilterList] represents a list of Filters with a relationship of [code]+FilterList.Operator.MUST_PASS_ALL+ or [code]+FilterList.Operator.MUST_PASS_ONE+ between the Filters.
+The following example shows an 'or' between two Filters (checking for either 'my value' or 'my other value' on the same attribute).
+
+[source,java]
+----
+
+FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ONE);
+SingleColumnValueFilter filter1 = new SingleColumnValueFilter(
+ cf,
+ column,
+ CompareOp.EQUAL,
+ Bytes.toBytes("my value")
+ );
+list.add(filter1);
+SingleColumnValueFilter filter2 = new SingleColumnValueFilter(
+ cf,
+ column,
+ CompareOp.EQUAL,
+ Bytes.toBytes("my other value")
+ );
+list.add(filter2);
+scan.setFilter(list);
+----
+
+[[client.filter.cv]]
+=== Column Value
+
+[[client.filter.cv.scvf]]
+==== SingleColumnValueFilter
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/SingleColumnValueFilter.html[SingleColumnValueFilter] can be used to test column values for equivalence ([code]+CompareOp.EQUAL
+ +), inequality ([code]+CompareOp.NOT_EQUAL+), or ranges (e.g., [code]+CompareOp.GREATER+). The following is example of testing equivalence a column to a String value "my value"...
+
+[source,java]
+----
+
+SingleColumnValueFilter filter = new SingleColumnValueFilter(
+ cf,
+ column,
+ CompareOp.EQUAL,
+ Bytes.toBytes("my value")
+ );
+scan.setFilter(filter);
+----
+
+[[client.filter.cvp]]
+=== Column Value Comparators
+
+There are several Comparator classes in the Filter package that deserve special mention.
+These Comparators are used in concert with other Filters, such as <>.
+
+[[client.filter.cvp.rcs]]
+==== RegexStringComparator
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RegexStringComparator.html[RegexStringComparator] supports regular expressions for value comparisons.
+
+[source,java]
+----
+
+RegexStringComparator comp = new RegexStringComparator("my."); // any value that starts with 'my'
+SingleColumnValueFilter filter = new SingleColumnValueFilter(
+ cf,
+ column,
+ CompareOp.EQUAL,
+ comp
+ );
+scan.setFilter(filter);
+----
+
+See the Oracle JavaDoc for link:http://download.oracle.com/javase/6/docs/api/java/util/regex/Pattern.html[supported
+ RegEx patterns in Java].
+
+[[client.filter.cvp.substringcomparator]]
+==== SubstringComparator
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/SubstringComparator.html[SubstringComparator] can be used to determine if a given substring exists in a value.
+The comparison is case-insensitive.
+
+[source,java]
+----
+
+SubstringComparator comp = new SubstringComparator("y val"); // looking for 'my value'
+SingleColumnValueFilter filter = new SingleColumnValueFilter(
+ cf,
+ column,
+ CompareOp.EQUAL,
+ comp
+ );
+scan.setFilter(filter);
+----
+
+[[client.filter.cvp.bfp]]
+==== BinaryPrefixComparator
+
+See link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryPrefixComparator.html[BinaryPrefixComparator].
+
+[[client.filter.cvp.bc]]
+==== BinaryComparator
+
+See link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/BinaryComparator.html[BinaryComparator].
+
+[[client.filter.kvm]]
+=== KeyValue Metadata
+
+As HBase stores data internally as KeyValue pairs, KeyValue Metadata Filters evaluate the existence of keys (i.e., ColumnFamily:Column qualifiers) for a row, as opposed to values the previous section.
+
+[[client.filter.kvm.ff]]
+==== FamilyFilter
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FamilyFilter.html[FamilyFilter] can be used to filter on the ColumnFamily.
+It is generally a better idea to select ColumnFamilies in the Scan than to do it with a Filter.
+
+[[client.filter.kvm.qf]]
+==== QualifierFilter
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/QualifierFilter.html[QualifierFilter] can be used to filter based on Column (aka Qualifier) name.
+
+[[client.filter.kvm.cpf]]
+==== ColumnPrefixFilter
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/ColumnPrefixFilter.html[ColumnPrefixFilter] can be used to filter based on the lead portion of Column (aka Qualifier) names.
+
+A ColumnPrefixFilter seeks ahead to the first column matching the prefix in each row and for each involved column family.
+It can be used to efficiently get a subset of the columns in very wide rows.
+
+Note: The same column qualifier can be used in different column families.
+This filter returns all matching columns.
+
+Example: Find all columns in a row and family that start with "abc"
+
+[source,java]
+----
+
+HTableInterface t = ...;
+byte[] row = ...;
+byte[] family = ...;
+byte[] prefix = Bytes.toBytes("abc");
+Scan scan = new Scan(row, row); // (optional) limit to one row
+scan.addFamily(family); // (optional) limit to one family
+Filter f = new ColumnPrefixFilter(prefix);
+scan.setFilter(f);
+scan.setBatch(10); // set this if there could be many columns returned
+ResultScanner rs = t.getScanner(scan);
+for (Result r = rs.next(); r != null; r = rs.next()) {
+ for (KeyValue kv : r.raw()) {
+ // each kv represents a column
+ }
+}
+rs.close();
+----
+
+[[client.filter.kvm.mcpf]]
+==== MultipleColumnPrefixFilter
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/MultipleColumnPrefixFilter.html[MultipleColumnPrefixFilter] behaves like ColumnPrefixFilter but allows specifying multiple prefixes.
+
+Like ColumnPrefixFilter, MultipleColumnPrefixFilter efficiently seeks ahead to the first column matching the lowest prefix and also seeks past ranges of columns between prefixes.
+It can be used to efficiently get discontinuous sets of columns from very wide rows.
+
+Example: Find all columns in a row and family that start with "abc" or "xyz"
+
+[source,java]
+----
+HTableInterface t = ...;
+byte[] row = ...;
+byte[] family = ...;
+byte[][] prefixes = new byte[][] {Bytes.toBytes("abc"), Bytes.toBytes("xyz")};
+Scan scan = new Scan(row, row); // (optional) limit to one row
+scan.addFamily(family); // (optional) limit to one family
+Filter f = new MultipleColumnPrefixFilter(prefixes);
+scan.setFilter(f);
+scan.setBatch(10); // set this if there could be many columns returned
+ResultScanner rs = t.getScanner(scan);
+for (Result r = rs.next(); r != null; r = rs.next()) {
+ for (KeyValue kv : r.raw()) {
+ // each kv represents a column
+ }
+}
+rs.close();
+----
+
+[[client.filter.kvm.crf]]
+==== ColumnRangeFilter
+
+A link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/ColumnRangeFilter.html[ColumnRangeFilter] allows efficient intra row scanning.
+
+A ColumnRangeFilter can seek ahead to the first matching column for each involved column family.
+It can be used to efficiently get a 'slice' of the columns of a very wide row.
+i.e.
+you have a million columns in a row but you only want to look at columns bbbb-bbdd.
+
+Note: The same column qualifier can be used in different column families.
+This filter returns all matching columns.
+
+Example: Find all columns in a row and family between "bbbb" (inclusive) and "bbdd" (inclusive)
+
+[source,java]
+----
+
+HTableInterface t = ...;
+byte[] row = ...;
+byte[] family = ...;
+byte[] startColumn = Bytes.toBytes("bbbb");
+byte[] endColumn = Bytes.toBytes("bbdd");
+Scan scan = new Scan(row, row); // (optional) limit to one row
+scan.addFamily(family); // (optional) limit to one family
+Filter f = new ColumnRangeFilter(startColumn, true, endColumn, true);
+scan.setFilter(f);
+scan.setBatch(10); // set this if there could be many columns returned
+ResultScanner rs = t.getScanner(scan);
+for (Result r = rs.next(); r != null; r = rs.next()) {
+ for (KeyValue kv : r.raw()) {
+ // each kv represents a column
+ }
+}
+rs.close();
+----
+
+Note: Introduced in HBase 0.92
+
+[[client.filter.row]]
+=== RowKey
+
+[[client.filter.row.rf]]
+==== RowFilter
+
+It is generally a better idea to use the startRow/stopRow methods on Scan for row selection, however link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/RowFilter.html[RowFilter] can also be used.
+
+[[client.filter.utility]]
+=== Utility
+
+[[client.filter.utility.fkof]]
+==== FirstKeyOnlyFilter
+
+This is primarily used for rowcount jobs.
+See link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/FirstKeyOnlyFilter.html[FirstKeyOnlyFilter].
+
+== Master
+
+[code]+HMaster+ is the implementation of the Master Server.
+The Master server is responsible for monitoring all RegionServer instances in the cluster, and is the interface for all metadata changes.
+In a distributed cluster, the Master typically runs on the <>.
+J Mohamed Zahoor goes into some more detail on the Master Architecture in this blog posting, link:http://blog.zahoor.in/2012/08/hbase-hmaster-architecture/[HBase HMaster
+ Architecture ].
+
+[[master.startup]]
+=== Startup Behavior
+
+If run in a multi-Master environment, all Masters compete to run the cluster.
+If the active Master loses its lease in ZooKeeper (or the Master shuts down), then then the remaining Masters jostle to take over the Master role.
+
+[[master.runtime]]
+=== Runtime Impact
+
+A common dist-list question involves what happens to an HBase cluster when the Master goes down.
+Because the HBase client talks directly to the RegionServers, the cluster can still function in a "steady state." Additionally, per <>, [code]+hbase:meta+ exists as an HBase table and is not resident in the Master.
+However, the Master controls critical functions such as RegionServer failover and completing region splits.
+So while the cluster can still run for a short time without the Master, the Master should be restarted as soon as possible.
+
+[[master.api]]
+=== Interface
+
+The methods exposed by [code]+HMasterInterface+ are primarily metadata-oriented methods:
+
+* Table (createTable, modifyTable, removeTable, enable, disable)
+* ColumnFamily (addColumn, modifyColumn, removeColumn)
+* Region (move, assign, unassign) For example, when the [code]+HBaseAdmin+ method [code]+disableTable+ is invoked, it is serviced by the Master server.
+
+[[master.processes]]
+=== Processes
+
+The Master runs several background threads:
+
+[[master.processes.loadbalancer]]
+==== LoadBalancer
+
+Periodically, and when there are no regions in transition, a load balancer will run and move regions around to balance the cluster's load.
+See <> for configuring this property.
+
+See <> for more information on region assignment.
+
+[[master.processes.catalog]]
+==== CatalogJanitor
+
+Periodically checks and cleans up the hbase:meta table.
+See <> for more information on META.
+
+[[regionserver.arch]]
+== RegionServer
+
+[code]+HRegionServer+ is the RegionServer implementation.
+It is responsible for serving and managing regions.
+In a distributed cluster, a RegionServer runs on a <>.
+
+[[regionserver.arch.api]]
+=== Interface
+
+The methods exposed by [code]+HRegionRegionInterface+ contain both data-oriented and region-maintenance methods:
+
+* Data (get, put, delete, next, etc.)
+* Region (splitRegion, compactRegion, etc.) For example, when the [code]+HBaseAdmin+ method [code]+majorCompact+ is invoked on a table, the client is actually iterating through all regions for the specified table and requesting a major compaction directly to each region.
+
+[[regionserver.arch.processes]]
+=== Processes
+
+The RegionServer runs a variety of background threads:
+
+[[regionserver.arch.processes.compactsplit]]
+==== CompactSplitThread
+
+Checks for splits and handle minor compactions.
+
+[[regionserver.arch.processes.majorcompact]]
+==== MajorCompactionChecker
+
+Checks for major compactions.
+
+[[regionserver.arch.processes.memstore]]
+==== MemStoreFlusher
+
+Periodically flushes in-memory writes in the MemStore to StoreFiles.
+
+[[regionserver.arch.processes.log]]
+==== LogRoller
+
+Periodically checks the RegionServer's WAL.
+
+=== Coprocessors
+
+Coprocessors were added in 0.92.
+There is a thorough link:https://blogs.apache.org/hbase/entry/coprocessor_introduction[Blog Overview
+ of CoProcessors] posted.
+Documentation will eventually move to this reference guide, but the blog is the most current information available at this time.
+
+[[block.cache]]
+=== Block Cache
+
+HBase provides two different BlockCache implementations: the default onheap LruBlockCache and BucketCache, which is (usually) offheap.
+This section discusses benefits and drawbacks of each implementation, how to choose the appropriate option, and configuration options for each.
+
+.Block Cache Reporting: UI
+[NOTE]
+====
+See the RegionServer UI for detail on caching deploy.
+Since HBase-0.98.4, the Block Cache detail has been significantly extended showing configurations, sizings, current usage, time-in-the-cache, and even detail on block counts and types.
+====
+
+==== Cache Choices
+
+[class]+LruBlockCache+ is the original implementation, and is entirely within the Java heap. [class]+BucketCache+ is mainly intended for keeping blockcache data offheap, although BucketCache can also keep data onheap and serve from a file-backed cache.
+
+.BucketCache is production ready as of hbase-0.98.6
+[NOTE]
+====
+To run with BucketCache, you need HBASE-11678.
+This was included in hbase-0.98.6.
+====
+
+Fetching will always be slower when fetching from BucketCache, as compared to the native onheap LruBlockCache.
+However, latencies tend to be less erratic across time, because there is less garbage collection when you use BucketCache since it is managing BlockCache allocations, not the GC.
+If the BucketCache is deployed in offheap mode, this memory is not managed by the GC at all.
+This is why you'd use BucketCache, so your latencies are less erratic and to mitigate GCs and heap fragmentation.
+See Nick Dimiduk's link:http://www.n10k.com/blog/blockcache-101/[BlockCache 101] for comparisons running onheap vs offheap tests.
+Also see link:http://people.apache.org/~stack/bc/[Comparing BlockCache Deploys] which finds that if your dataset fits inside your LruBlockCache deploy, use it otherwise if you are experiencing cache churn (or you want your cache to exist beyond the vagaries of java GC), use BucketCache.
+
+When you enable BucketCache, you are enabling a two tier caching system, an L1 cache which is implemented by an instance of LruBlockCache and an offheap L2 cache which is implemented by BucketCache.
+Management of these two tiers and the policy that dictates how blocks move between them is done by [class]+CombinedBlockCache+.
+It keeps all DATA blocks in the L2 BucketCache and meta blocks -- INDEX and BLOOM blocks -- onheap in the L1 [class]+LruBlockCache+.
+See <> for more detail on going offheap.
+
+[[cache.configurations]]
+==== General Cache Configurations
+
+Apart from the cache implementation itself, you can set some general configuration options to control how the cache performs.
+See link:http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CacheConfig.html.
+After setting any of these options, restart or rolling restart your cluster for the configuration to take effect.
+Check logs for errors or unexpected behavior.
+
+See also <>, which discusses a new option introduced in link:https://issues.apache.org/jira/browse/HBASE-9857[HBASE-9857].
+
+[[block.cache.design]]
+==== LruBlockCache Design
+
+The LruBlockCache is an LRU cache that contains three levels of block priority to allow for scan-resistance and in-memory ColumnFamilies:
+
+* Single access priority: The first time a block is loaded from HDFS it normally has this priority and it will be part of the first group to be considered during evictions.
+ The advantage is that scanned blocks are more likely to get evicted than blocks that are getting more usage.
+* Mutli access priority: If a block in the previous priority group is accessed again, it upgrades to this priority.
+ It is thus part of the second group considered during evictions.
+* In-memory access priority: If the block's family was configured to be "in-memory", it will be part of this priority disregarding the number of times it was accessed.
+ Catalog tables are configured like this.
+ This group is the last one considered during evictions.
++
+To mark a column family as in-memory, call
+
+[source,java]
+----
+HColumnDescriptor.setInMemory(true);
+----
+
+if creating a table from java, or set +IN_MEMORY => true+ when creating or altering a table in the shell: e.g.
+
+[source]
+----
+hbase(main):003:0> create 't', {NAME => 'f', IN_MEMORY => 'true'}
+----
+
+For more information, see the link:http://hbase.apache.org/xref/org/apache/hadoop/hbase/io/hfile/LruBlockCache.html[LruBlockCache
+ source]
+
+[[block.cache.usage]]
+==== LruBlockCache Usage
+
+Block caching is enabled by default for all the user tables which means that any read operation will load the LRU cache.
+This might be good for a large number of use cases, but further tunings are usually required in order to achieve better performance.
+An important concept is the link:http://en.wikipedia.org/wiki/Working_set_size[working set size], or WSS, which is: "the amount of memory needed to compute the answer to a problem". For a website, this would be the data that's needed to answer the queries over a short amount of time.
+
+The way to calculate how much memory is available in HBase for caching is:
+
+[source]
+----
+
+ number of region servers * heap size * hfile.block.cache.size * 0.99
+----
+
+The default value for the block cache is 0.25 which represents 25% of the available heap.
+The last value (99%) is the default acceptable loading factor in the LRU cache after which eviction is started.
+The reason it is included in this equation is that it would be unrealistic to say that it is possible to use 100% of the available memory since this would make the process blocking from the point where it loads new blocks.
+Here are some examples:
+
+* One region server with the default heap size (1 GB) and the default block cache size will have 253 MB of block cache available.
+* 20 region servers with the heap size set to 8 GB and a default block cache size will have 39.6 of block cache.
+* 100 region servers with the heap size set to 24 GB and a block cache size of 0.5 will have about 1.16 TB of block cache.
+
+Your data is not the only resident of the block cache.
+Here are others that you may have to take into account:
+
+Catalog Tables::
+ The [code]+-ROOT-+ (prior to HBase 0.96.
+ See <>) and [code]+hbase:meta+ tables are forced into the block cache and have the in-memory priority which means that they are harder to evict.
+ The former never uses more than a few hundreds of bytes while the latter can occupy a few MBs (depending on the number of regions).
+
+HFiles Indexes::
+ An [firstterm]_hfile_ is the file format that HBase uses to store data in HDFS.
+ It contains a multi-layered index which allows HBase to seek to the data without having to read the whole file.
+ The size of those indexes is a factor of the block size (64KB by default), the size of your keys and the amount of data you are storing.
+ For big data sets it's not unusual to see numbers around 1GB per region server, although not all of it will be in cache because the LRU will evict indexes that aren't used.
+
+Keys::
+ The values that are stored are only half the picture, since each value is stored along with its keys (row key, family qualifier, and timestamp). See <>.
+
+Bloom Filters::
+ Just like the HFile indexes, those data structures (when enabled) are stored in the LRU.
+
+Currently the recommended way to measure HFile indexes and bloom filters sizes is to look at the region server web UI and checkout the relevant metrics.
+For keys, sampling can be done by using the HFile command line tool and look for the average key size metric.
+Since HBase 0.98.3, you can view detail on BlockCache stats and metrics in a special Block Cache section in the UI.
+
+It's generally bad to use block caching when the WSS doesn't fit in memory.
+This is the case when you have for example 40GB available across all your region servers' block caches but you need to process 1TB of data.
+One of the reasons is that the churn generated by the evictions will trigger more garbage collections unnecessarily.
+Here are two use cases:
+
+* Fully random reading pattern: This is a case where you almost never access the same row twice within a short amount of time such that the chance of hitting a cached block is close to 0.
+ Setting block caching on such a table is a waste of memory and CPU cycles, more so that it will generate more garbage to pick up by the JVM.
+ For more information on monitoring GC, see <>.
+* Mapping a table: In a typical MapReduce job that takes a table in input, every row will be read only once so there's no need to put them into the block cache.
+ The Scan object has the option of turning this off via the setCaching method (set it to false). You can still keep block caching turned on on this table if you need fast random read access.
+ An example would be counting the number of rows in a table that serves live traffic, caching every block of that table would create massive churn and would surely evict data that's currently in use.
+
+[[data.blocks.in.fscache]]
+===== Caching META blocks only (DATA blocks in fscache)
+
+An interesting setup is one where we cache META blocks only and we read DATA blocks in on each access.
+If the DATA blocks fit inside fscache, this alternative may make sense when access is completely random across a very large dataset.
+To enable this setup, alter your table and for each column family set [var]+BLOCKCACHE => 'false'+.
+You are 'disabling' the BlockCache for this column family only you can never disable the caching of META blocks.
+Since link:https://issues.apache.org/jira/browse/HBASE-4683[HBASE-4683 Always cache index and bloom blocks], we will cache META blocks even if the BlockCache is disabled.
+
+[[offheap.blockcache]]
+==== Offheap Block Cache
+
+[[enable.bucketcache]]
+===== How to Enable BucketCache
+
+The usual deploy of BucketCache is via a managing class that sets up two caching tiers: an L1 onheap cache implemented by LruBlockCache and a second L2 cache implemented with BucketCache.
+The managing class is link:http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.html[CombinedBlockCache] by default.
+The just-previous link describes the caching 'policy' implemented by CombinedBlockCache.
+In short, it works by keeping meta blocks -- INDEX and BLOOM in the L1, onheap LruBlockCache tier -- and DATA blocks are kept in the L2, BucketCache tier.
+It is possible to amend this behavior in HBase since version 1.0 and ask that a column family have both its meta and DATA blocks hosted onheap in the L1 tier by setting [var]+cacheDataInL1+ via [code]+(HColumnDescriptor.setCacheDataInL1(true)+ or in the shell, creating or amending column families setting [var]+CACHE_DATA_IN_L1+ to true: e.g.
+[source]
+----
+hbase(main):003:0> create 't', {NAME => 't', CONFIGURATION => {CACHE_DATA_IN_L1 => 'true'}}
+----
+
+The BucketCache Block Cache can be deployed onheap, offheap, or file based.
+You set which via the [var]+hbase.bucketcache.ioengine+ setting.
+Setting it to [var]+heap+ will have BucketCache deployed inside the allocated java heap.
+Setting it to [var]+offheap+ will have BucketCache make its allocations offheap, and an ioengine setting of [var]+file:PATH_TO_FILE+ will direct BucketCache to use a file caching (Useful in particular if you have some fast i/o attached to the box such as SSDs).
+
+It is possible to deploy an L1+L2 setup where we bypass the CombinedBlockCache policy and have BucketCache working as a strict L2 cache to the L1 LruBlockCache.
+For such a setup, set [var]+CacheConfig.BUCKET_CACHE_COMBINED_KEY+ to [literal]+false+.
+In this mode, on eviction from L1, blocks go to L2.
+When a block is cached, it is cached first in L1.
+When we go to look for a cached block, we look first in L1 and if none found, then search L2.
+Let us call this deploy format,
+_(((Raw L1+L2)))_.
+
+Other BucketCache configs include: specifying a location to persist cache to across restarts, how many threads to use writing the cache, etc.
+See the link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/io/hfile/CacheConfig.html[CacheConfig.html] class for configuration options and descriptions.
+
+
+
+====== BucketCache Example Configuration
+This sample provides a configuration for a 4 GB offheap BucketCache with a 1 GB onheap cache.
+
+Configuration is performed on the RegionServer.
+
+Setting [var]+hbase.bucketcache.ioengine+ and [var]+hbase.bucketcache.size+ > 0 enables CombinedBlockCache.
+Let us presume that the RegionServer has been set to run with a 5G heap: i.e.
+HBASE_HEAPSIZE=5g.
+
+
+. First, edit the RegionServer's [path]_hbase-env.sh_ and set [var]+HBASE_OFFHEAPSIZE+ to a value greater than the offheap size wanted, in this case, 4 GB (expressed as 4G). Lets set it to 5G.
+ That'll be 4G for our offheap cache and 1G for any other uses of offheap memory (there are other users of offheap memory other than BlockCache; e.g.
+ DFSClient in RegionServer can make use of offheap memory). See <>.
+ +
+[source]
+----
+HBASE_OFFHEAPSIZE=5G
+----
+
+. Next, add the following configuration to the RegionServer's [path]_hbase-site.xml_.
++
+[source,xml]
+----
+
+ hbase.bucketcache.ioengine
+ offheap
+
+
+ hfile.block.cache.size
+ 0.2
+
+
+ hbase.bucketcache.size
+ 4196
+
+----
+
+. Restart or rolling restart your cluster, and check the logs for any issues.
+
+
+In the above, we set bucketcache to be 4G.
+The onheap lrublockcache we configured to have 0.2 of the RegionServer's heap size (0.2 * 5G = 1G). In other words, you configure the L1 LruBlockCache as you would normally, as you would when there is no L2 BucketCache present.
+
+link:https://issues.apache.org/jira/browse/HBASE-10641[HBASE-10641] introduced the ability to configure multiple sizes for the buckets of the bucketcache, in HBase 0.98 and newer.
+To configurable multiple bucket sizes, configure the new property +hfile.block.cache.sizes+ (instead of +hfile.block.cache.size+) to a comma-separated list of block sizes, ordered from smallest to largest, with no spaces.
+The goal is to optimize the bucket sizes based on your data access patterns.
+The following example configures buckets of size 4096 and 8192.
+
+----
+
+
+ hfile.block.cache.sizes
+ 4096,8192
+
+----
+
+.Direct Memory Usage In HBase
+[NOTE]
+====
+The default maximum direct memory varies by JVM.
+Traditionally it is 64M or some relation to allocated heap size (-Xmx) or no limit at all (JDK7 apparently). HBase servers use direct memory, in particular short-circuit reading, the hosted DFSClient will allocate direct memory buffers.
+If you do offheap block caching, you'll be making use of direct memory.
+Starting your JVM, make sure the [var]+-XX:MaxDirectMemorySize+ setting in [path]_conf/hbase-env.sh_ is set to some value that is higher than what you have allocated to your offheap blockcache ([var]+hbase.bucketcache.size+). It should be larger than your offheap block cache and then some for DFSClient usage (How much the DFSClient uses is not easy to quantify; it is the number of open hfiles * [var]+hbase.dfs.client.read.shortcircuit.buffer.size+ where hbase.dfs.client.read.shortcircuit.buffer.size is set to 128k in HBase -- see [path]_hbase-default.xml_ default configurations). Direct memory, which is part of the Java process heap, is separate from the object heap allocated by -Xmx.
+The value allocated by MaxDirectMemorySize must not exceed physical RAM, and is likely to be less than the total available RAM due to other memory requirements and system constraints.
+
+You can see how much memory -- onheap and offheap/direct -- a RegionServer is configured to use and how much it is using at any one time by looking at the _Server Metrics: Memory_ tab in the UI.
+It can also be gotten via JMX.
+In particular the direct memory currently used by the server can be found on the [var]+java.nio.type=BufferPool,name=direct+ bean.
+Terracotta has a link:http://terracotta.org/documentation/4.0/bigmemorygo/configuration/storage-options[good write up] on using offheap memory in java.
+It is for their product BigMemory but alot of the issues noted apply in general to any attempt at going offheap.
+Check it out.
+====
+
+.hbase.bucketcache.percentage.in.combinedcache
+[NOTE]
+====
+This is a pre-HBase 1.0 configuration removed because it was confusing.
+It was a float that you would set to some value between 0.0 and 1.0.
+Its default was 0.9.
+If the deploy was using CombinedBlockCache, then the LruBlockCache L1 size was calculated to be (1 - [var]+hbase.bucketcache.percentage.in.combinedcache+) * [var]+size-of-bucketcache+ and the BucketCache size was [var]+hbase.bucketcache.percentage.in.combinedcache+ * size-of-bucket-cache.
+where size-of-bucket-cache itself is EITHER the value of the configuration hbase.bucketcache.size IF it was specified as megabytes OR [var]+hbase.bucketcache.size+ * [var]+-XX:MaxDirectMemorySize+ if [var]+hbase.bucketcache.size+ between 0 and 1.0.
+
+In 1.0, it should be more straight-forward.
+L1 LruBlockCache size is set as a fraction of java heap using hfile.block.cache.size setting (not the best name) and L2 is set as above either in absolute megabytes or as a fraction of allocated maximum direct memory.
+====
+
+==== Comprewssed Blockcache
+
+link:https://issues.apache.org/jira/browse/HBASE-11331[HBASE-11331] introduced lazy blockcache decompression, more simply referred to as compressed blockcache.
+When compressed blockcache is enabled.
+data and encoded data blocks are cached in the blockcache in their on-disk format, rather than being decompressed and decrypted before caching.
+
+For a RegionServer hosting more data than can fit into cache, enabling this feature with SNAPPY compression has been shown to result in 50% increase in throughput and 30% improvement in mean latency while, increasing garbage collection by 80% and increasing overall CPU load by 2%. See HBASE-11331 for more details about how performance was measured and achieved.
+For a RegionServer hosting data that can comfortably fit into cache, or if your workload is sensitive to extra CPU or garbage-collection load, you may receive less benefit.
+
+Compressed blockcache is disabled by default.
+To enable it, set [code]+hbase.block.data.cachecompressed+ to [code]+true+ in [path]_hbase-site.xml_ on all RegionServers.
+
+[[wal]]
+=== Write Ahead Log (WAL)
+
+[[purpose.wal]]
+==== Purpose
+
+The [firstterm]_Write Ahead Log (WAL)_ records all changes to data in HBase, to file-based storage.
+Under normal operations, the WAL is not needed because data changes move from the MemStore to StoreFiles.
+However, if a RegionServer crashes or becomes unavailable before the MemStore is flushed, the WAL ensures that the changes to the data can be replayed.
+If writing to the WAL fails, the entire operation to modify the data fails.
+
+HBase uses an implementation of the link:http://hbase.apache.org/devapidocs/org/apache/hadoop/hbase/wal/WAL.html[WAL] interface.
+Usually, there is only one instance of a WAL per RegionServer.
+The RegionServer records Puts and Deletes to it, before recording them to the <> for the affected <>.
+
+.The HLog
+[NOTE]
+====
+Prior to 2.0, the interface for WALs in HBase was named [class]+HLog+.
+In 0.94, HLog was the name of the implementation of the WAL.
+You will likely find references to the HLog in documentation tailored to these older versions.
+====
+
+The WAL resides in HDFS in the [path]_/hbase/WALs/_ directory (prior to HBase 0.94, they were stored in [path]_/hbase/.logs/_), with subdirectories per region.
+
+For more general information about the concept of write ahead logs, see the Wikipedia link:http://en.wikipedia.org/wiki/Write-ahead_logging[Write-Ahead Log] article.
+
+[[wal_flush]]
+==== WAL Flushing
+
+TODO (describe).
+
+==== WAL Splitting
+
+A RegionServer serves many regions.
+All of the regions in a region server share the same active WAL file.
+Each edit in the WAL file includes information about which region it belongs to.
+When a region is opened, the edits in the WAL file which belong to that region need to be replayed.
+Therefore, edits in the WAL file must be grouped by region so that particular sets can be replayed to regenerate the data in a particular region.
+The process of grouping the WAL edits by region is called [firstterm]_log
+ splitting_.
+It is a critical process for recovering data if a region server fails.
+
+Log splitting is done by the HMaster during cluster start-up or by the ServerShutdownHandler as a region server shuts down.
+So that consistency is guaranteed, affected regions are unavailable until data is restored.
+All WAL edits need to be recovered and replayed before a given region can become available again.
+As a result, regions affected by log splitting are unavailable until the process completes.
+
+.Procedure: Log Splitting, Step by Step
+. The [path]_/hbase/WALs/,,_ directory is renamed.
++
+Renaming the directory is important because a RegionServer may still be up and accepting requests even if the HMaster thinks it is down.
+If the RegionServer does not respond immediately and does not heartbeat its ZooKeeper session, the HMaster may interpret this as a RegionServer failure.
+Renaming the logs directory ensures that existing, valid WAL files which are still in use by an active but busy RegionServer are not written to by accident.
++
+The new directory is named according to the following pattern:
++
+----
+/hbase/WALs/,,-splitting
+----
++
+An example of such a renamed directory might look like the following:
++
+----
+/hbase/WALs/srv.example.com,60020,1254173957298-splitting
+----
+
+. Each log file is split, one at a time.
++
+The log splitter reads the log file one edit entry at a time and puts each edit entry into the buffer corresponding to the edit's region.
+At the same time, the splitter starts several writer threads.
+Writer threads pick up a corresponding buffer and write the edit entries in the buffer to a temporary recovered edit file.
+The temporary edit file is stored to disk with the following naming pattern:
++
+----
+/hbase///recovered.edits/.temp
+----
++
+This file is used to store all the edits in the WAL log for this region.
+After log splitting completes, the [path]_.temp_ file is renamed to the sequence ID of the first log written to the file.
++
+To determine whether all edits have been written, the sequence ID is compared to the sequence of the last edit that was written to the HFile.
+If the sequence of the last edit is greater than or equal to the sequence ID included in the file name, it is clear that all writes from the edit file have been completed.
+
+. After log splitting is complete, each affected region is assigned to a
+ RegionServer.
++
+When the region is opened, the [path]_recovered.edits_ folder is checked for recovered edits files.
+If any such files are present, they are replayed by reading the edits and saving them to the MemStore.
+After all edit files are replayed, the contents of the MemStore are written to disk (HFile) and the edit files are deleted.
+
+
+===== Handling of Errors During Log Splitting
+
+If you set the [var]+hbase.hlog.split.skip.errors+ option to [constant]+true+, errors are treated as follows:
+
+* Any error encountered during splitting will be logged.
+* The problematic WAL log will be moved into the [path]_.corrupt_ directory under the hbase [var]+rootdir+,
+* Processing of the WAL will continue
+
+If the [var]+hbase.hlog.split.skip.errors+ optionset to [literal]+false+, the default, the exception will be propagated and the split will be logged as failed.
+See link:https://issues.apache.org/jira/browse/HBASE-2958[HBASE-2958 When
+ hbase.hlog.split.skip.errors is set to false, we fail the split but thats
+ it].
+We need to do more than just fail split if this flag is set.
+
+====== How EOFExceptions are treated when splitting a crashed RegionServers'WALs
+
+If an EOFException occurs while splitting logs, the split proceeds even when [var]+hbase.hlog.split.skip.errors+ is set to [literal]+false+.
+An EOFException while reading the last log in the set of files to split is likely, because the RegionServer is likely to be in the process of writing a record at the time of a crash.
+For background, see link:https://issues.apache.org/jira/browse/HBASE-2643[HBASE-2643
+ Figure how to deal with eof splitting logs]
+
+===== Performance Improvements during Log Splitting
+
+WAL log splitting and recovery can be resource intensive and take a long time, depending on the number of RegionServers involved in the crash and the size of the regions. <> and <> were developed to improve performance during log splitting.
+
+[[distributed.log.splitting]]
+====== Distributed Log Splitting
+
+[firstterm]_Distributed Log Splitting_ was added in HBase version 0.92 (link:https://issues.apache.org/jira/browse/HBASE-1364[HBASE-1364]) by Prakash Khemani from Facebook.
+It reduces the time to complete log splitting dramatically, improving the availability of regions and tables.
+For example, recovering a crashed cluster took around 9 hours with single-threaded log splitting, but only about six minutes with distributed log splitting.
+
+The information in this section is sourced from Jimmy Xiang's blog post at link:http://blog.cloudera.com/blog/2012/07/hbase-log-splitting/.
+
+.Enabling or Disabling Distributed Log Splitting
+
+Distributed log processing is enabled by default since HBase 0.92.
+The setting is controlled by the +hbase.master.distributed.log.splitting+ property, which can be set to [literal]+true+ or [literal]+false+, but defaults to [literal]+true+.
+
+[[log.splitting.step.by.step]]
+.Distributed Log Splitting, Step by Step
+
+After configuring distributed log splitting, the HMaster controls the process.
+The HMaster enrolls each RegionServer in the log splitting process, and the actual work of splitting the logs is done by the RegionServers.
+The general process for log splitting, as described in <> still applies here.
+
+. If distributed log processing is enabled, the HMaster creates a [firstterm]_split log manager_ instance when the cluster is started.
+ .. The split log manager manages all log files which need to be scanned and split.
+ .. The split log manager places all the logs into the ZooKeeper splitlog node ([path]_/hbase/splitlog_) as tasks.
+ .. You can view the contents of the splitlog by issuing the following +zkcli+ command. Example output is shown.
++
+----
+ls /hbase/splitlog
+[hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost8.sample.com%2C57020%2C1340474893275-splitting%2Fhost8.sample.com%253A57020.1340474893900,
+hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost3.sample.com%2C57020%2C1340474893299-splitting%2Fhost3.sample.com%253A57020.1340474893931,
+hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost4.sample.com%2C57020%2C1340474893287-splitting%2Fhost4.sample.com%253A57020.1340474893946]
+----
++
+The output contains some non-ASCII characters.
+When decoded, it looks much more simple:
++
+----
+[hdfs://host2.sample.com:56020/hbase/.logs
+/host8.sample.com,57020,1340474893275-splitting
+/host8.sample.com%3A57020.1340474893900,
+hdfs://host2.sample.com:56020/hbase/.logs
+/host3.sample.com,57020,1340474893299-splitting
+/host3.sample.com%3A57020.1340474893931,
+hdfs://host2.sample.com:56020/hbase/.logs
+/host4.sample.com,57020,1340474893287-splitting
+/host4.sample.com%3A57020.1340474893946]
+----
++
+The listing represents WAL file names to be scanned and split, which is a list of log splitting tasks.
+
+. The split log manager monitors the log-splitting tasks and workers.
++
+The split log manager is responsible for the following ongoing tasks:
++
+* Once the split log manager publishes all the tasks to the splitlog znode, it monitors these task nodes and waits for them to be processed.
+* Checks to see if there are any dead split log workers queued up.
+ If it finds tasks claimed by unresponsive workers, it will resubmit those tasks.
+ If the resubmit fails due to some ZooKeeper exception, the dead worker is queued up again for retry.
+* Checks to see if there are any unassigned tasks.
+ If it finds any, it create an ephemeral rescan node so that each split log worker is notified to re-scan unassigned tasks via the [code]+nodeChildrenChanged+ ZooKeeper event.
+* Checks for tasks which are assigned but expired.
+ If any are found, they are moved back to [code]+TASK_UNASSIGNED+ state again so that they can be retried.
+ It is possible that these tasks are assigned to slow workers, or they may already be finished.
+ This is not a problem, because log splitting tasks have the property of idempotence.
+ In other words, the same log splitting task can be processed many times without causing any problem.
+* The split log manager watches the HBase split log znodes constantly.
+ If any split log task node data is changed, the split log manager retrieves the node data.
+ The node data contains the current state of the task.
+ You can use the +zkcli+ +get+ command to retrieve the current state of a task.
+ In the example output below, the first line of the output shows that the task is currently unassigned.
++
+----
+get /hbase/splitlog/hdfs%3A%2F%2Fhost2.sample.com%3A56020%2Fhbase%2F.logs%2Fhost6.sample.com%2C57020%2C1340474893287-splitting%2Fhost6.sample.com%253A57020.1340474893945
+
+unassigned host2.sample.com:57000
+cZxid = 0×7115
+ctime = Sat Jun 23 11:13:40 PDT 2012
+...
+----
++
+Based on the state of the task whose data is changed, the split log manager does one of the following:
++
+* Resubmit the task if it is unassigned
+* Heartbeat the task if it is assigned
+* Resubmit or fail the task if it is resigned (see <>)
+* Resubmit or fail the task if it is completed with errors (see <>)
+* Resubmit or fail the task if it could not complete due to errors (see <>)
+* Delete the task if it is successfully completed or failed
++
+* .Reasons a Task Will FailThe task has been deleted.
+* The node no longer exists.
+* The log status manager failed to move the state of the task to TASK_UNASSIGNED.
+* The number of resubmits is over the resubmit threshold.
+
+
+. Each RegionServer's split log worker performs the log-splitting tasks.
++
+Each RegionServer runs a daemon thread called the [firstterm]_split log
+ worker_, which does the work to split the logs.
+The daemon thread starts when the RegionServer starts, and registers itself to watch HBase znodes.
+If any splitlog znode children change, it notifies a sleeping worker thread to wake up and grab more tasks.
+If if a worker's current task's node data is changed, the worker checks to see if the task has been taken by another worker.
+If so, the worker thread stops work on the current task.
++
+The worker monitors the splitlog znode constantly.
+When a new task appears, the split log worker retrieves the task paths and checks each one until it finds an unclaimed task, which it attempts to claim.
+If the claim was successful, it attempts to perform the task and updates the task's +state+ property based on the splitting outcome.
+At this point, the split log worker scans for another unclaimed task.
++
+* .How the Split Log Worker Approaches a TaskIt queries the task state and only takes action if the task is in [literal]+TASK_UNASSIGNED +state.
+* If the task is is in [literal]+TASK_UNASSIGNED+ state, the worker attempts to set the state to [literal]+TASK_OWNED+ by itself.
+ If it fails to set the state, another worker will try to grab it.
+ The split log manager will also ask all workers to rescan later if the task remains unassigned.
+* If the worker succeeds in taking ownership of the task, it tries to get the task state again to make sure it really gets it asynchronously.
+ In the meantime, it starts a split task executor to do the actual work:
++
+* Get the HBase root folder, create a temp folder under the root, and split the log file to the temp folder.
+* If the split was successful, the task executor sets the task to state [literal]+TASK_DONE+.
+* If the worker catches an unexpected IOException, the task is set to state [literal]+TASK_ERR+.
+* If the worker is shutting down, set the the task to state [literal]+TASK_RESIGNED+.
+* If the task is taken by another worker, just log it.
+
+
+. The split log manager monitors for uncompleted tasks.
++
+The split log manager returns when all tasks are completed successfully.
+If all tasks are completed with some failures, the split log manager throws an exception so that the log splitting can be retried.
+Due to an asynchronous implementation, in very rare cases, the split log manager loses track of some completed tasks.
+For that reason, it periodically checks for remaining uncompleted task in its task map or ZooKeeper.
+If none are found, it throws an exception so that the log splitting can be retried right away instead of hanging there waiting for something that won't happen.
+
+
+[[distributed.log.replay]]
+====== Distributed Log Replay
+
+After a RegionServer fails, its failed region is assigned to another RegionServer, which is marked as "recovering" in ZooKeeper.
+A split log worker directly replays edits from the WAL of the failed region server to the region at its new location.
+When a region is in "recovering" state, it can accept writes but no reads (including Append and Increment), region splits or merges.
+
+Distributed Log Replay extends the <> framework.
+It works by directly replaying WAL edits to another RegionServer instead of creating [path]_recovered.edits_ files.
+It provides the following advantages over distributed log splitting alone:
+
+* It eliminates the overhead of writing and reading a large number of [path]_recovered.edits_ files.
+ It is not unusual for thousands of [path]_recovered.edits_ files to be created and written concurrently during a RegionServer recovery.
+ Many small random writes can degrade overall system performance.
+* It allows writes even when a region is in recovering state.
+ It only takes seconds for a recovering region to accept writes again.
+
+.Enabling Distributed Log Replay
+To enable distributed log replay, set [var]+hbase.master.distributed.log.replay+ to true.
+This will be the default for HBase 0.99 (link:https://issues.apache.org/jira/browse/HBASE-10888[HBASE-10888]).
+
+You must also enable HFile version 3 (which is the default HFile format starting in HBase 0.99.
+See link:https://issues.apache.org/jira/browse/HBASE-10855[HBASE-10855]). Distributed log replay is unsafe for rolling upgrades.
+
+[[wal.disable]]
+==== Disabling the WAL
+
+It is possible to disable the WAL, to improve performace in certain specific situations.
+However, disabling the WAL puts your data at risk.
+The only situation where this is recommended is during a bulk load.
+This is because, in the event of a problem, the bulk load can be re-run with no risk of data loss.
+
+The WAL is disabled by calling the HBase client field [code]+Mutation.writeToWAL(false)+.
+Use the [code]+Mutation.setDurability(Durability.SKIP_WAL)+ and Mutation.getDurability() methods to set and get the field's value.
+There is no way to disable the WAL for only a specific table.
+
+WARNING: If you disable the WAL for anything other than bulk loads, your data is at risk.
+
+[[regions.arch]]
+== Regions
+
+Regions are the basic element of availability and distribution for tables, and are comprised of a Store per Column Family.
+The heirarchy of objects is as follows:
+
+[source]
+----
+Table (HBase table)
+ Region (Regions for the table)
+ Store (Store per ColumnFamily for each Region for the table)
+ MemStore (MemStore for each Store for each Region for the table)
+ StoreFile (StoreFiles for each Store for each Region for the table)
+ Block (Blocks within a StoreFile within a Store for each Region for the table)
+----
+
+For a description of what HBase files look like when written to HDFS, see <>.
+
+[[arch.regions.size]]
+=== Considerations for Number of Regions
+
+In general, HBase is designed to run with a small (20-200) number of relatively large (5-20Gb) regions per server.
+The considerations for this are as follows:
+
+[[too_many_regions]]
+==== Why cannot I have too many regions?
+
+Typically you want to keep your region count low on HBase for numerous reasons.
+Usually right around 100 regions per RegionServer has yielded the best results.
+Here are some of the reasons below for keeping region count low:
+
+. MSLAB requires 2mb per memstore (that's 2mb per family per region). 1000 regions that have 2 families each is 3.9GB of heap used, and it's not even storing data yet.
+ NB: the 2MB value is configurable.
+. If you fill all the regions at somewhat the same rate, the global memory usage makes it that it forces tiny flushes when you have too many regions which in turn generates compactions.
+ Rewriting the same data tens of times is the last thing you want.
+ An example is filling 1000 regions (with one family) equally and let's consider a lower bound for global memstore usage of 5GB (the region server would have a big heap). Once it reaches 5GB it will force flush the biggest region, at that point they should almost all have about 5MB of data so it would flush that amount.
+ 5MB inserted later, it would flush another region that will now have a bit over 5MB of data, and so on.
+ This is currently the main limiting factor for the number of regions; see <> for detailed formula.
+. The master as is is allergic to tons of regions, and will take a lot of time assigning them and moving them around in batches.
+ The reason is that it's heavy on ZK usage, and it's not very async at the moment (could really be improved -- and has been imporoved a bunch in 0.96 hbase).
+. In older versions of HBase (pre-v2 hfile, 0.90 and previous), tons of regions on a few RS can cause the store file index to rise, increasing heap usage and potentially creating memory pressure or OOME on the RSs
+
+Another issue is the effect of the number of regions on mapreduce jobs; it is typical to have one mapper per HBase region.
+Thus, hosting only 5 regions per RS may not be enough to get sufficient number of tasks for a mapreduce job, while 1000 regions will generate far too many tasks.
+
+See <> for configuration guidelines.
+
+[[regions.arch.assignment]]
+=== Region-RegionServer Assignment
+
+This section describes how Regions are assigned to RegionServers.
+
+[[regions.arch.assignment.startup]]
+==== Startup
+
+When HBase starts regions are assigned as follows (short version):
+
+. The Master invokes the [code]+AssignmentManager+ upon startup.
+. The [code]+AssignmentManager+ looks at the existing region assignments in META.
+. If the region assignment is still valid (i.e., if the RegionServer is still online) then the assignment is kept.
+. If the assignment is invalid, then the [code]+LoadBalancerFactory+ is invoked to assign the region.
+ The [code]+DefaultLoadBalancer+ will randomly assign the region to a RegionServer.
+. META is updated with the RegionServer assignment (if needed) and the RegionServer start codes (start time of the RegionServer process) upon region opening by the RegionServer.
+
+[[regions.arch.assignment.failover]]
+==== Failover
+
+When a RegionServer fails:
+
+. The regions immediately become unavailable because the RegionServer is down.
+. The Master will detect that the RegionServer has failed.
+. The region assignments will be considered invalid and will be re-assigned just like the startup sequence.
+. In-flight queries are re-tried, and not lost.
+. Operations are switched to a new RegionServer within the following amount of time:
++
+[source]
+----
+ZooKeeper session timeout + split time + assignment/replay time
+----
+
+
+[[regions.arch.balancer]]
+==== Region Load Balancing
+
+Regions can be periodically moved by the <>.
+
+[[regions.arch.states]]
+==== Region State Transition
+
+HBase maintains a state for each region and persists the state in META.
+The state of the META region itself is persisted in ZooKeeper.
+You can see the states of regions in transition in the Master web UI.
+Following is the list of possible region states.
+
+* .Possible Region StatesOFFLINE: the region is offline and not opening
+* OPENING: the region is in the process of being opened
+* OPEN: the region is open and the region server has notified the master
+* FAILED_OPEN: the region server failed to open the region
+* CLOSING: the region is in the process of being closed
+* CLOSED: the region server has closed the region and notified the master
+* FAILED_CLOSE: the region server failed to close the region
+* SPLITTING: the region server notified the master that the region is splitting
+* SPLIT: the region server notified the master that the region has finished splitting
+* SPLITTING_NEW: this region is being created by a split which is in progress
+* MERGING: the region server notified the master that this region is being merged with another region
+* MERGED: the region server notified the master that this region has been merged
+* MERGING_NEW: this region is being created by a merge of two regions
+
+.Region State Transitions
+image::region_states.png[]
+
+.Graph Legend
+* Brown: Offline state, a special state that can be transient (after closed before opening), terminal (regions of disabled tables), or initial (regions of newly created tables)
+* Palegreen: Online state that regions can serve requests
+* Lightblue: Transient states
+* Red: Failure states that need OPS attention
+* Gold: Terminal states of regions split/merged
+* Grey: Initial states of regions created through split/merge
+
+.Transition State Descriptions
+. The master moves a region from [literal]+OFFLINE+ to [literal]+OPENING+ state and tries to assign the region to a region server.
+ The region server may or may not have received the open region request.
+ The master retries sending the open region request to the region server until the RPC goes through or the master runs out of retries.
+ After the region server receives the open region request, the region server begins opening the region.
+. If the master is running out of retries, the master prevents the region server from opening the region by moving the region to [literal]+CLOSING+ state and trying to close it, even if the region server is starting to open the region.
+. After the region server opens the region, it continues to try to notify the master until the master moves the region to [literal]+OPEN+ state and notifies the region server.
+ The region is now open.
+. If the region server cannot open the region, it notifies the master.
+ The master moves the region to [literal]+CLOSED+ state and tries to open the region on a different region server.
+. If the master cannot open the region on any of a certain number of regions, it moves the region to [literal]+FAILED_OPEN+ state, and takes no further action until an operator intervenes from the HBase shell, or the server is dead.
+. The master moves a region from [literal]+OPEN+ to [literal]+CLOSING+ state.
+ The region server holding the region may or may not have received the close region request.
+ The master retries sending the close request to the server until the RPC goes through or the master runs out of retries.
+. If the region server is not online, or throws [code]+NotServingRegionException+, the master moves the region to [literal]+OFFLINE+ state and re-assigns it to a different region server.
+. If the region server is online, but not reachable after the master runs out of retries, the master moves the region to [literal]+FAILED_CLOSE+ state and takes no further action until an operator intervenes from the HBase shell, or the server is dead.
+. If the region server gets the close region request, it closes the region and notifies the master.
+ The master moves the region to [literal]+CLOSED+ state and re-assigns it to a different region server.
+. Before assigning a region, the master moves the region to [literal]+OFFLINE+ state automatically if it is in [literal]+CLOSED+ state.
+. When a region server is about to split a region, it notifies the master.
+ The master moves the region to be split from [literal]+OPEN+ to [literal]+SPLITTING+ state and add the two new regions to be created to the region server.
+ These two regions are in [literal]+SPLITING_NEW+ state initially.
+. After notifying the master, the region server starts to split the region.
+ Once past the point of no return, the region server notifies the master again so the master can update the META.
+ However, the master does not update the region states until it is notified by the server that the split is done.
+ If the split is successful, the splitting region is moved from [literal]+SPLITTING+ to [literal]+SPLIT+ state and the two new regions are moved from [literal]+SPLITTING_NEW+ to [literal]+OPEN+ state.
+. If the split fails, the splitting region is moved from [literal]+SPLITTING+ back to [literal]+OPEN+ state, and the two new regions which were created are moved from [literal]+SPLITTING_NEW+ to [literal]+OFFLINE+ state.
+. When a region server is about to merge two regions, it notifies the master first.
+ The master moves the two regions to be merged from [literal]+OPEN+ to [literal]+MERGING+state, and adds the new region which will hold the contents of the merged regions region to the region server.
+ The new region is in [literal]+MERGING_NEW+ state initially.
+. After notifying the master, the region server starts to merge the two regions.
+ Once past the point of no return, the region server notifies the master again so the master can update the META.
+ However, the master does not update the region states until it is notified by the region server that the merge has completed.
+ If the merge is successful, the two merging regions are moved from [literal]+MERGING+ to [literal]+MERGED+ state and the new region is moved from [literal]+MERGING_NEW+ to [literal]+OPEN+ state.
+. If the merge fails, the two merging regions are moved from [literal]+MERGING+ back to [literal]+OPEN+ state, and the new region which was created to hold the contents of the merged regions is moved from [literal]+MERGING_NEW+ to [literal]+OFFLINE+ state.
+. For regions in [literal]+FAILED_OPEN+ or [literal]+FAILED_CLOSE+ states , the master tries to close them again when they are reassigned by an operator via HBase Shell.
+
+[[regions.arch.locality]]
+=== Region-RegionServer Locality
+
+Over time, Region-RegionServer locality is achieved via HDFS block replication.
+The HDFS client does the following by default when choosing locations to write replicas:
+
+. First replica is written to local node
+. Second replica is written to a random node on another rack
+. Third replica is written on the same rack as the second, but on a different node chosen randomly
+. Subsequent replicas are written on random nodes on the cluster.
+ See _Replica Placement: The First Baby Steps_ on this page: link:http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html[HDFS Architecture]
+
+Thus, HBase eventually achieves locality for a region after a flush or a compaction.
+In a RegionServer failover situation a RegionServer may be assigned regions with non-local StoreFiles (because none of the replicas are local), however as new data is written in the region, or the table is compacted and StoreFiles are re-written, they will become "local" to the RegionServer.
+
+For more information, see _Replica Placement: The First Baby Steps_ on this page: link:http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html[HDFS Architecture] and also Lars George's blog on link:http://www.larsgeorge.com/2010/05/hbase-file-locality-in-hdfs.html[HBase and HDFS locality].
+
+[[arch.region.splits]]
+=== Region Splits
+
+Regions split when they reach a configured threshold.
+Below we treat the topic in short.
+For a longer exposition, see link:http://hortonworks.com/blog/apache-hbase-region-splitting-and-merging/[Apache HBase Region Splitting and Merging] by our Enis Soztutar.
+
+Splits run unaided on the RegionServer; i.e.
+the Master does not participate.
+The RegionServer splits a region, offlines the split region and then adds the daughter regions to META, opens daughters on the parent's hosting RegionServer and then reports the split to the Master.
+See <> for how to manually manage splits (and for why you might do this)
+
+==== Custom Split Policies
+
+The default split policy can be overwritten using a custom link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.html[RegionSplitPolicy] (HBase 0.94+). Typically a custom split policy should extend HBase's default split policy: link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.html[ConstantSizeRegionSplitPolicy].
+
+The policy can set globally through the HBaseConfiguration used or on a per table basis:
+[source,java]
+----
+
+HTableDescriptor myHtd = ...;
+myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName());
+----
+
+[[manual_region_splitting_decisions]]
+=== Manual Region Splitting
+
+It is possible to manually split your table, either at table creation (pre-splitting), or at a later time as an administrative action.
+You might choose to split your region for one or more of the following reasons.
+There may be other valid reasons, but the need to manually split your table might also point to problems with your schema design.
+
+* .Reasons to Manually Split Your TableYour data is sorted by timeseries or another similar algorithm that sorts new data at the end of the table.
+ This means that the Region Server holding the last region is always under load, and the other Region Servers are idle, or mostly idle.
+ See also <>.
+* You have developed an unexpected hotspot in one region of your table.
+ For instance, an application which tracks web searches might be inundated by a lot of searches for a celebrity in the event of news about that celebrity.
+ See <> for more discussion about this particular scenario.
+* After a big increase to the number of Region Servers in your cluster, to get the load spread out quickly.
+* Before a bulk-load which is likely to cause unusual and uneven load across regions.
+
+See <> for a discussion about the dangers and possible benefits of managing splitting completely manually.
+
+==== Determining Split Points
+
+The goal of splitting your table manually is to improve the chances of balancing the load across the cluster in situations where good rowkey design alone won't get you there.
+Keeping that in mind, the way you split your regions is very dependent upon the characteristics of your data.
+It may be that you already know the best way to split your table.
+If not, the way you split your table depends on what your keys are like.
+
+Alphanumeric Rowkeys::
+ If your rowkeys start with a letter or number, you can split your table at letter or number boundaries.
+ For instance, the following command creates a table with regions that split at each vowel, so the first region has A-D, the second region has E-H, the third region has I-N, the fourth region has O-V, and the fifth region has U-Z.
+
+Using a Custom Algorithm::
+ The RegionSplitter tool is provided with HBase, and uses a [firstterm]_SplitAlgorithm_ to determine split points for you.
+ As parameters, you give it the algorithm, desired number of regions, and column families.
+ It includes two split algorithms.
+ The first is the [code]+HexStringSplit+ algorithm, which assumes the row keys are hexadecimal strings.
+ The second, link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/util/RegionSplitter.UniformSplit.html[UniformSplit], assumes the row keys are random byte arrays.
+ You will probably need to develop your own SplitAlgorithm, using the provided ones as models.
+
+=== Online Region Merges
+
+Both Master and Regionserver participate in the event of online region merges.
+Client sends merge RPC to master, then master moves the regions together to the same regionserver where the more heavily loaded region resided, finally master send merge request to this regionserver and regionserver run the region merges.
+Similar with process of region splits, region merges run as a local transaction on the regionserver, offlines the regions and then merges two regions on the file system, atomically delete merging regions from META and add merged region to the META, opens merged region on the regionserver and reports the merge to Master at last.
+
+An example of region merges in the hbase shell
+[source,bourne]
+----
+$ hbase> merge_region 'ENCODED_REGIONNAME', 'ENCODED_REGIONNAME'
+ hbase> merge_region 'ENCODED_REGIONNAME', 'ENCODED_REGIONNAME', true
+----
+It's an asynchronous operation and call returns immediately without waiting merge completed.
+Passing 'true' as the optional third parameter will force a merge ('force' merges regardless else merge will fail unless passed adjacent regions.
+'force' is for expert use only)
+
+=== Store
+
+A Store hosts a MemStore and 0 or more StoreFiles (HFiles). A Store corresponds to a column family for a table for a given region.
+
+[[store.memstore]]
+==== MemStore
+
+The MemStore holds in-memory modifications to the Store.
+Modifications are Cells/KeyValues.
+When a flush is requested, the current memstore is moved to a snapshot and is cleared.
+HBase continues to serve edits from the new memstore and backing snapshot until the flusher reports that the flush succeeded.
+At this point, the snapshot is discarded.
+Note that when the flush happens, Memstores that belong to the same region will all be flushed.
+
+==== MemStoreFlush
+
+A MemStore flush can be triggered under any of the conditions listed below.
+The minimum flush unit is per region, not at individual MemStore level.
+
+. When a MemStore reaches the value specified by [var]+hbase.hregion.memstore.flush.size+, all MemStores that belong to its region will be flushed out to disk.
+. When overall memstore usage reaches the value specified by [var]+hbase.regionserver.global.memstore.upperLimit+, MemStores from various regions will be flushed out to disk to reduce overall MemStore usage in a Region Server.
+ The flush order is based on the descending order of a region's MemStore usage.
+ Regions will have their MemStores flushed until the overall MemStore usage drops to or slightly below [var]+hbase.regionserver.global.memstore.lowerLimit+.
+. When the number of WAL per region server reaches the value specified in [var]+hbase.regionserver.max.logs+, MemStores from various regions will be flushed out to disk to reduce WAL count.
+ The flush order is based on time.
+ Regions with the oldest MemStores are flushed first until WAL count drops below [var]+hbase.regionserver.max.logs+.
+
+[[hregion.scans]]
+==== Scans
+
+* When a client issues a scan against a table, HBase generates [code]+RegionScanner+ objects, one per region, to serve the scan request.
+* The [code]+RegionScanner+ object contains a list of [code]+StoreScanner+ objects, one per column family.
+* Each [code]+StoreScanner+ object further contains a list of [code]+StoreFileScanner+ objects, corresponding to each StoreFile and HFile of the corresponding column family, and a list of [code]+KeyValueScanner+ objects for the MemStore.
+* The two lists are merge into one, which is sorted in ascending order with the scan object for the MemStore at the end of the list.
+* When a [code]+StoreFileScanner+ object is constructed, it is associated with a [code]+MultiVersionConsistencyControl+ read point, which is the current [code]+memstoreTS+, filtering out any new updates beyond the read point.
+
+[[hfile]]
+==== StoreFile (HFile)
+
+StoreFiles are where your data lives.
+
+===== HFile Format
+
+The _hfile_ file format is based on the SSTable file described in the link:http://research.google.com/archive/bigtable.html[BigTable [2006]] paper and on Hadoop's link:http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/io/file/tfile/TFile.html[tfile] (The unit test suite and the compression harness were taken directly from tfile). Schubert Zhang's blog post on link:http://cloudepr.blogspot.com/2009/09/hfile-block-indexed-file-format-to.html[HFile: A Block-Indexed File Format to Store Sorted Key-Value Pairs] makes for a thorough introduction to HBase's hfile.
+Matteo Bertozzi has also put up a helpful description, link:http://th30z.blogspot.com/2011/02/hbase-io-hfile.html?spref=tw[HBase I/O: HFile].
+
+For more information, see the link:http://hbase.apache.org/xref/org/apache/hadoop/hbase/io/hfile/HFile.html[HFile source code].
+Also see <> for information about the HFile v2 format that was included in 0.92.
+
+===== HFile Tool
+
+To view a textualized version of hfile content, you can do use the [class]+org.apache.hadoop.hbase.io.hfile.HFile
+ +tool.
+Type the following to see usage:
+[source,bourne]
+----
+$ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile
+----
+For example, to view the content of the file [path]_hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475_, type the following:
+[source,bourne]
+----
+ $ ${HBASE_HOME}/bin/hbase org.apache.hadoop.hbase.io.hfile.HFile -v -f hdfs://10.81.47.41:8020/hbase/TEST/1418428042/DSMP/4759508618286845475
+----
+If you leave off the option -v to see just a summary on the hfile.
+See usage for other things to do with the [class]+HFile+ tool.
+
+[[store.file.dir]]
+===== StoreFile Directory Structure on HDFS
+
+For more information of what StoreFiles look like on HDFS with respect to the directory structure, see <>.
+
+[[hfile.blocks]]
+==== Blocks
+
+StoreFiles are composed of blocks.
+The blocksize is configured on a per-ColumnFamily basis.
+
+Compression happens at the block level within StoreFiles.
+For more information on compression, see <>.
+
+For more information on blocks, see the link:http://hbase.apache.org/xref/org/apache/hadoop/hbase/io/hfile/HFileBlock.html[HFileBlock source code].
+
+==== KeyValue
+
+The KeyValue class is the heart of data storage in HBase.
+KeyValue wraps a byte array and takes offsets and lengths into passed array at where to start interpreting the content as KeyValue.
+
+The KeyValue format inside a byte array is:
+
+* keylength
+* valuelength
+* key
+* value
+
+The Key is further decomposed as:
+
+* rowlength
+* row (i.e., the rowkey)
+* columnfamilylength
+* columnfamily
+* columnqualifier
+* timestamp
+* keytype (e.g., Put, Delete, DeleteColumn, DeleteFamily)
+
+KeyValue instances are _not_ split across blocks.
+For example, if there is an 8 MB KeyValue, even if the block-size is 64kb this KeyValue will be read in as a coherent block.
+For more information, see the link:http://hbase.apache.org/xref/org/apache/hadoop/hbase/KeyValue.html[KeyValue source code].
+
+[[keyvalue.example]]
+===== Example
+
+To emphasize the points above, examine what happens with two Puts for two different columns for the same row:
+
+* Put #1: [code]+rowkey=row1, cf:attr1=value1+
+* Put #2: [code]+rowkey=row1, cf:attr2=value2+
+
+Even though these are for the same row, a KeyValue is created for each column:
+
+Key portion for Put #1:
+
+* rowlength [code]+------------> 4+
+* row [code]+-----------------> row1+
+* columnfamilylength [code]+---> 2+
+* columnfamily [code]+--------> cf+
+* columnqualifier [code]+------> attr1+
+* timestamp [code]+-----------> server time of Put+
+* keytype [code]+-------------> Put+
+
+Key portion for Put #2:
+
+* rowlength [code]+------------> 4+
+* row [code]+-----------------> row1+
+* columnfamilylength [code]+---> 2+
+* columnfamily [code]+--------> cf+
+* columnqualifier [code]+------> attr2+
+* timestamp [code]+-----------> server time of Put+
+* keytype [code]+-------------> Put+
+
+It is critical to understand that the rowkey, ColumnFamily, and column (aka columnqualifier) are embedded within the KeyValue instance.
+The longer these identifiers are, the bigger the KeyValue is.
+
+==== Compaction
+
+* .Ambiguous TerminologyA [firstterm]_StoreFile_ is a facade of HFile.
+ In terms of compaction, use of StoreFile seems to have prevailed in the past.
+* A [firstterm]_Store_ is the same thing as a ColumnFamily.
+ StoreFiles are related to a Store, or ColumnFamily.
+* If you want to read more about StoreFiles versus HFiles and Stores versus ColumnFamilies, see link:https://issues.apache.org/jira/browse/HBASE-11316[HBASE-11316].
+
+When the MemStore reaches a given size ([code]+hbase.hregion.memstore.flush.size)+, it flushes its contents to a StoreFile.
+The number of StoreFiles in a Store increases over time. [firstterm]_Compaction_ is an operation which reduces the number of StoreFiles in a Store, by merging them together, in order to increase performance on read operations.
+Compactions can be resource-intensive to perform, and can either help or hinder performance depending on many factors.
+
+Compactions fall into two categories: minor and major.
+Minor and major compactions differ in the following ways.
+
+[firstterm]_Minor compactions_ usually select a small number of small, adjacent StoreFiles and rewrite them as a single StoreFile.
+Minor compactions do not drop (filter out) deletes or expired versions, because of potential side effects.
+See <> and <> for information on how deletes and versions are handled in relation to compactions.
+The end result of a minor compaction is fewer, larger StoreFiles for a given Store.
+
+The end result of a [firstterm]_major compaction_ is a single StoreFile per Store.
+Major compactions also process delete markers and max versions.
+See <> and <> for information on how deletes and versions are handled in relation to compactions.
+
+.Compaction and Deletions
+When an explicit deletion occurs in HBase, the data is not actually deleted.
+Instead, a [firstterm]_tombstone_ marker is written.
+The tombstone marker prevents the data from being returned with queries.
+During a major compaction, the data is actually deleted, and the tombstone marker is removed from the StoreFile.
+If the deletion happens because of an expired TTL, no tombstone is created.
+Instead, the expired data is filtered out and is not written back to the compacted StoreFile.
+
+.Compaction and Versions
+When you create a Column Family, you can specify the maximum number of versions to keep, by specifying [var]+HColumnDescriptor.setMaxVersions(int
+ versions)+.
+The default value is [literal]+3+.
+If more versions than the specified maximum exist, the excess versions are filtered out and not written back to the compacted StoreFile.
+
+.Major Compactions Can Impact Query Results
+[NOTE]
+====
+In some situations, older versions can be inadvertently resurrected if a newer version is explicitly deleted.
+See <> for a more in-depth explanation.
+This situation is only possible before the compaction finishes.
+====
+
+In theory, major compactions improve performance.
+However, on a highly loaded system, major compactions can require an inappropriate number of resources and adversely affect performance.
+In a default configuration, major compactions are scheduled automatically to run once in a 7-day period.
+This is sometimes inappropriate for systems in production.
+You can manage major compactions manually.
+See <>.
+
+Compactions do not perform region merges.
+See <> for more information on region merging.
+
+[[compaction.file.selection]]
+===== Compaction Policy - HBase 0.96.x and newer
+
+Compacting large StoreFiles, or too many StoreFiles at once, can cause more IO load than your cluster is able to handle without causing performance problems.
+The method by which HBase selects which StoreFiles to include in a compaction (and whether the compaction is a minor or major compaction) is called the [firstterm]_compaction
+ policy_.
+
+Prior to HBase 0.96.x, there was only one compaction policy.
+That original compaction policy is still available as [systemitem]+RatioBasedCompactionPolicy+ The new compaction default policy, called [systemitem]+ExploringCompactionPolicy+, was subsequently backported to HBase 0.94 and HBase 0.95, and is the default in HBase 0.96 and newer.
+It was implemented in link:https://issues.apache.org/jira/browse/HBASE-7842[HBASE-7842].
+In short, [systemitem]+ExploringCompactionPolicy+ attempts to select the best possible set of StoreFiles to compact with the least amount of work, while the [systemitem]+RatioBasedCompactionPolicy+ selects the first set that meets the criteria.
+
+Regardless of the compaction policy used, file selection is controlled by several configurable parameters and happens in a multi-step approach.
+These parameters will be explained in context, and then will be given in a table which shows their descriptions, defaults, and implications of changing them.
+
+[[compaction.being.stuck]]
+====== Being Stuck
+
+When the MemStore gets too large, it needs to flush its contents to a StoreFile.
+However, a Store can only have [var]+hbase.hstore.blockingStoreFiles+ files, so the MemStore needs to wait for the number of StoreFiles to be reduced by one or more compactions.
+However, if the MemStore grows larger than [var]+hbase.hregion.memstore.flush.size+, it is not able to flush its contents to a StoreFile.
+If the MemStore is too large and the number of StpreFo;es is also too high, the algorithm is said to be "stuck". The compaction algorithm checks for this "stuck" situation and provides mechanisms to alleviate it.
+
+[[exploringcompaction.policy]]
+====== The ExploringCompactionPolicy Algorithm
+
+The ExploringCompactionPolicy algorithm considers each possible set of adjacent StoreFiles before choosing the set where compaction will have the most benefit.
+
+One situation where the ExploringCompactionPolicy works especially well is when you are bulk-loading data and the bulk loads create larger StoreFiles than the StoreFiles which are holding data older than the bulk-loaded data.
+This can "trick" HBase into choosing to perform a major compaction each time a compaction is needed, and cause a lot of extra overhead.
+With the ExploringCompactionPolicy, major compactions happen much less frequently because minor compactions are more efficient.
+
+In general, ExploringCompactionPolicy is the right choice for most situations, and thus is the default compaction policy.
+You can also use ExploringCompactionPolicy along with <>.
+
+The logic of this policy can be examined in [path]_hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/ExploringCompactionPolicy.java_.
+The following is a walk-through of the logic of the ExploringCompactionPolicy.
+
+
+. Make a list of all existing StoreFiles in the Store.
+ The rest of the algorithm filters this list to come up with the subset of HFiles which will be chosen for compaction.
+. If this was a user-requested compaction, attempt to perform the requested compaction type, regardless of what would normally be chosen.
+ Note that even if the user requests a major compaction, it may not be possible to perform a major compaction.
+ This may be because not all StoreFiles in the Column Family are available to compact or because there are too many Stores in the Column Family.
+. Some StoreFiles are automatically excluded from consideration.
+ These include:
++
+* StoreFiles that are larger than [var]+hbase.hstore.compaction.max.size+
+* StoreFiles that were created by a bulk-load operation which explicitly excluded compaction.
+ You may decide to exclude StoreFiles resulting from bulk loads, from compaction.
+ To do this, specify the [var]+hbase.mapreduce.hfileoutputformat.compaction.exclude+ parameter during the bulk load operation.
+
+. Iterate through the list from step 1, and make a list of all potential sets of StoreFiles to compact together.
+ A potential set is a grouping of [var]+hbase.hstore.compaction.min+ contiguous StoreFiles in the list.
+ For each set, perform some sanity-checking and figure out whether this is the best compaction that could be done:
++
+* If the number of StoreFiles in this set (not the size of the StoreFiles) is fewer than [var]+hbase.hstore.compaction.min+ or more than [var]+hbase.hstore.compaction.max+, take it out of consideration.
+* Compare the size of this set of StoreFiles with the size of the smallest possible compaction that has been found in the list so far.
+ If the size of this set of StoreFiles represents the smallest compaction that could be done, store it to be used as a fall-back if the algorithm is "stuck" and no StoreFiles would otherwise be chosen.
+ See <>.
+* Do size-based sanity checks against each StoreFile in this set of StoreFiles.
++
+* If the size of this StoreFile is larger than [var]+hbase.hstore.compaction.max.size+, take it out of consideration.
+* If the size is greater than or equal to [var]+hbase.hstore.compaction.min.size+, sanity-check it against the file-based ratio to see whether it is too large to be considered.
+ The sanity-checking is successful if:
++
+* There is only one StoreFile in this set, or
+* For each StoreFile, its size multiplied by [var]+hbase.hstore.compaction.ratio+ (or [var]+hbase.hstore.compaction.ratio.offpeak+ if off-peak hours are configured and it is during off-peak hours) is less than the sum of the sizes of the other HFiles in the set.
+
+
+
+. If this set of StoreFiles is still in consideration, compare it to the previously-selected best compaction.
+ If it is better, replace the previously-selected best compaction with this one.
+. When the entire list of potential compactions has been processed, perform the best compaction that was found.
+ If no StoreFiles were selected for compaction, but there are multiple StoreFiles, assume the algorithm is stuck (see <>) and if so, perform the smallest compaction that was found in step 3.
+
+[[compaction.ratiobasedcompactionpolicy.algorithm]]
+====== RatioBasedCompactionPolicy Algorithm
+
+The RatioBasedCompactionPolicy was the only compaction policy prior to HBase 0.96, though ExploringCompactionPolicy has now been backported to HBase 0.94 and 0.95.
+To use the RatioBasedCompactionPolicy rather than the ExploringCompactionPolicy, set [var]+hbase.hstore.defaultengine.compactionpolicy.class+ to [literal]+RatioBasedCompactionPolicy+ in the [path]_hbase-site.xml_ file.
+To switch back to the ExploringCompactionPolicy, remove the setting from the [path]_hbase-site.xml_.
+
+The following section walks you through the algorithm used to select StoreFiles for compaction in the RatioBasedCompactionPolicy.
+
+
+. The first phase is to create a list of all candidates for compaction.
+ A list is created of all StoreFiles not already in the compaction queue, and all StoreFiles newer than the newest file that is currently being compacted.
+ This list of StoreFiles is ordered by the sequence ID.
+ The sequence ID is generated when a Put is appended to the write-ahead log (WAL), and is stored in the metadata of the HFile.
+. Check to see if the algorithm is stuck (see <>, and if so, a major compaction is forced.
+ This is a key area where <> is often a better choice than the RatioBasedCompactionPolicy.
+. If the compaction was user-requested, try to perform the type of compaction that was requested.
+ Note that a major compaction may not be possible if all HFiles are not available for compaction or if too may StoreFiles exist (more than [var]+hbase.hstore.compaction.max+).
+. Some StoreFiles are automatically excluded from consideration.
+ These include:
++
+* StoreFiles that are larger than [var]+hbase.hstore.compaction.max.size+
+* StoreFiles that were created by a bulk-load operation which explicitly excluded compaction.
+ You may decide to exclude StoreFiles resulting from bulk loads, from compaction.
+ To do this, specify the [var]+hbase.mapreduce.hfileoutputformat.compaction.exclude+ parameter during the bulk load operation.
+
+. The maximum number of StoreFiles allowed in a major compaction is controlled by the [var]+hbase.hstore.compaction.max+ parameter.
+ If the list contains more than this number of StoreFiles, a minor compaction is performed even if a major compaction would otherwise have been done.
+ However, a user-requested major compaction still occurs even if there are more than [var]+hbase.hstore.compaction.max+ StoreFiles to compact.
+. If the list contains fewer than [var]+hbase.hstore.compaction.min+ StoreFiles to compact, a minor compaction is aborted.
+ Note that a major compaction can be performed on a single HFile.
+ Its function is to remove deletes and expired versions, and reset locality on the StoreFile.
+. The value of the [var]+hbase.hstore.compaction.ratio+ parameter is multiplied by the sum of StoreFiles smaller than a given file, to determine whether that StoreFile is selected for compaction during a minor compaction.
+ For instance, if hbase.hstore.compaction.ratio is 1.2, FileX is 5 mb, FileY is 2 mb, and FileZ is 3 mb:
++
+----
+5 <= 1.2 x (2 + 3) or 5 <= 6
+----
++
+In this scenario, FileX is eligible for minor compaction.
+If FileX were 7 mb, it would not be eligible for minor compaction.
+This ratio favors smaller StoreFile.
+You can configure a different ratio for use in off-peak hours, using the parameter [var]+hbase.hstore.compaction.ratio.offpeak+, if you also configure [var]+hbase.offpeak.start.hour+ and [var]+hbase.offpeak.end.hour+.
+
+. If the last major compaction was too long ago and there is more than one StoreFile to be compacted, a major compaction is run, even if it would otherwise have been minor.
+ By default, the maximum time between major compactions is 7 days, plus or minus a 4.8 hour period, and determined randomly within those parameters.
+ Prior to HBase 0.96, the major compaction period was 24 hours.
+ See [var]+hbase.hregion.majorcompaction+ in the table below to tune or disable time-based major compactions.
+
+[[compaction.parameters]]
+====== Parameters Used by Compaction Algorithm
+
+This table contains the main configuration parameters for compaction.
+This list is not exhaustive.
+To tune these parameters from the defaults, edit the [path]_hbase-default.xml_ file.
+For a full list of all configuration parameters available, see <>
+
+[cols="1,1,1", options="header"]
+|===
+| Parameter
+| Description
+| Default
+
+| The minimum number of StoreFiles which must be eligible for
+ compaction before compaction can run.
+ The goal of tuning hbase.hstore.compaction.min
+ is to avoid ending up with too many tiny StoreFiles to compact. Setting
+ this value to 2 would cause a minor compaction each
+ time you have two StoreFiles in a Store, and this is probably not
+ appropriate. If you set this value too high, all the other values will
+ need to be adjusted accordingly. For most cases, the default value is
+ appropriate.
+ In previous versions of HBase, the parameter
+ hbase.hstore.compaction.min was called
+ hbase.hstore.compactionThreshold.
+
+
+| The maximum number of StoreFiles which will be selected for a
+ single minor compaction, regardless of the number of eligible
+ StoreFiles.
+ Effectively, the value of
+ hbase.hstore.compaction.max controls the length of
+ time it takes a single compaction to complete. Setting it larger means
+ that more StoreFiles are included in a compaction. For most cases, the
+ default value is appropriate.
+
+
+| A StoreFile smaller than this size will always be eligible for
+ minor compaction. StoreFiles this size or larger are evaluated by
+ hbase.hstore.compaction.ratio to determine if they are
+ eligible.
+ Because this limit represents the "automatic include" limit for
+ all StoreFiles smaller than this value, this value may need to be reduced
+ in write-heavy environments where many files in the 1-2 MB range are being
+ flushed, because every StoreFile will be targeted for compaction and the
+ resulting StoreFiles may still be under the minimum size and require
+ further compaction.
+ If this parameter is lowered, the ratio check is triggered more
+ quickly. This addressed some issues seen in earlier versions of HBase but
+ changing this parameter is no longer necessary in most situations.
+
+
+| An StoreFile larger than this size will be excluded from
+ compaction. The effect of raising
+ hbase.hstore.compaction.max.size is fewer, larger
+ StoreFiles that do not get compacted often. If you feel that compaction is
+ happening too often without much benefit, you can try raising this
+ value.
+
+| For minor compaction, this ratio is used to determine whether a
+ given StoreFile which is larger than
+ hbase.hstore.compaction.min.size is eligible for
+ compaction. Its effect is to limit compaction of large StoreFile. The
+ value of hbase.hstore.compaction.ratio is expressed as
+ a floating-point decimal.
+ A large ratio, such as 10, will produce a
+ single giant StoreFile. Conversely, a value of .25,
+ will produce behavior similar to the BigTable compaction algorithm,
+ producing four StoreFiles.
+ A moderate value of between 1.0 and 1.4 is recommended. When
+ tuning this value, you are balancing write costs with read costs. Raising
+ the value (to something like 1.4) will have more write costs, because you
+ will compact larger StoreFiles. However, during reads, HBase will need to seek
+ through fewer StpreFo;es to accomplish the read. Consider this approach if you
+ cannot take advantage of .
+ Alternatively, you can lower this value to something like 1.0 to
+ reduce the background cost of writes, and use to limit the number of StoreFiles touched
+ during reads.
+ For most cases, the default value is appropriate.
+
+
+| The compaction ratio used during off-peak compactions, if off-peak
+ hours are also configured (see below). Expressed as a floating-point
+ decimal. This allows for more aggressive (or less aggressive, if you set it
+ lower than hbase.hstore.compaction.ratio) compaction
+ during a set time period. Ignored if off-peak is disabled (default). This
+ works the same as hbase.hstore.compaction.ratio.
+
+| The start of off-peak hours, expressed as an integer between 0 and 23,
+ inclusive. Set to -1 to disable off-peak.
+
+| The end of off-peak hours, expressed as an integer between 0 and 23,
+ inclusive. Set to -1 to disable off-peak.
+
+| There are two different thread pools for compactions, one for
+ large compactions and the other for small compactions. This helps to keep
+ compaction of lean tables (such as hbase:meta)
+ fast. If a compaction is larger than this threshold, it goes into the
+ large compaction pool. In most cases, the default value is
+ appropriate.
+
+| Time between major compactions, expressed in milliseconds. Set to
+ 0 to disable time-based automatic major compactions. User-requested and
+ size-based major compactions will still run. This value is multiplied by
+ hbase.hregion.majorcompaction.jitter to cause
+ compaction to start at a somewhat-random time during a given window of
+ time.
+
+| A multiplier applied to
+ hbase.hregion.majorcompaction to cause compaction to
+ occur a given amount of time either side of
+ hbase.hregion.majorcompaction. The smaller the
+ number, the closer the compactions will happen to the
+ hbase.hregion.majorcompaction interval. Expressed as
+ a floating-point decimal.
+|===
+
+[[compaction.file.selection.old]]
+===== Compaction File Selection
+
+.Legacy Information
+[NOTE]
+====
+This section has been preserved for historical reasons and refers to the way compaction worked prior to HBase 0.96.x.
+You can still use this behavior if you enable <> For information on the way that compactions work in HBase 0.96.x and later, see <>.
+====
+
+To understand the core algorithm for StoreFile selection, there is some ASCII-art in the link:http://hbase.apache.org/xref/org/apache/hadoop/hbase/regionserver/Store.html#836[Store
+ source code] that will serve as useful reference.
+It has been copied below:
+[source]
+----
+/* normal skew:
+ *
+ * older ----> newer
+ * _
+ * | | _
+ * | | | | _
+ * --|-|- |-|- |-|---_-------_------- minCompactSize
+ * | | | | | | | | _ | |
+ * | | | | | | | | | | | |
+ * | | | | | | | | | | | |
+ */
+----
+.Important knobs:
+* [code]+hbase.hstore.compaction.ratio+ Ratio used in compaction file selection algorithm (default 1.2f).
+* [code]+hbase.hstore.compaction.min+ (.90 hbase.hstore.compactionThreshold) (files) Minimum number of StoreFiles per Store to be selected for a compaction to occur (default 2).
+* [code]+hbase.hstore.compaction.max+ (files) Maximum number of StoreFiles to compact per minor compaction (default 10).
+* [code]+hbase.hstore.compaction.min.size+ (bytes) Any StoreFile smaller than this setting with automatically be a candidate for compaction.
+ Defaults to [code]+hbase.hregion.memstore.flush.size+ (128 mb).
+* [code]+hbase.hstore.compaction.max.size+ (.92) (bytes) Any StoreFile larger than this setting with automatically be excluded from compaction (default Long.MAX_VALUE).
+
+The minor compaction StoreFile selection logic is size based, and selects a file for compaction when the file <= sum(smaller_files) * [code]+hbase.hstore.compaction.ratio+.
+
+[[compaction.file.selection.example1]]
+====== Minor Compaction File Selection - Example #1 (Basic Example)
+
+This example mirrors an example from the unit test [code]+TestCompactSelection+.
+
+* [code]+hbase.hstore.compaction.ratio+ = 1.0f
+* [code]+hbase.hstore.compaction.min+ = 3 (files)
+* [code]+hbase.hstore.compaction.max+ = 5 (files)
+* [code]+hbase.hstore.compaction.min.size+ = 10 (bytes)
+* [code]+hbase.hstore.compaction.max.size+ = 1000 (bytes)
+
+The following StoreFiles exist: 100, 50, 23, 12, and 12 bytes apiece (oldest to newest). With the above parameters, the files that would be selected for minor compaction are 23, 12, and 12.
+
+Why?
+
+* 100 --> No, because sum(50, 23, 12, 12) * 1.0 = 97.
+* 50 --> No, because sum(23, 12, 12) * 1.0 = 47.
+* 23 --> Yes, because sum(12, 12) * 1.0 = 24.
+* 12 --> Yes, because the previous file has been included, and because this does not exceed the the max-file limit of 5
+* 12 --> Yes, because the previous file had been included, and because this does not exceed the the max-file limit of 5.
+
+[[compaction.file.selection.example2]]
+====== Minor Compaction File Selection - Example #2 (Not Enough Files ToCompact)
+
+This example mirrors an example from the unit test [code]+TestCompactSelection+.
+
+* [code]+hbase.hstore.compaction.ratio+ = 1.0f
+* [code]+hbase.hstore.compaction.min+ = 3 (files)
+* [code]+hbase.hstore.compaction.max+ = 5 (files)
+* [code]+hbase.hstore.compaction.min.size+ = 10 (bytes)
+* [code]+hbase.hstore.compaction.max.size+ = 1000 (bytes)
+
+The following StoreFiles exist: 100, 25, 12, and 12 bytes apiece (oldest to newest). With the above parameters, no compaction will be started.
+
+Why?
+
+* 100 --> No, because sum(25, 12, 12) * 1.0 = 47
+* 25 --> No, because sum(12, 12) * 1.0 = 24
+* 12 --> No.
+ Candidate because sum(12) * 1.0 = 12, there are only 2 files to compact and that is less than the threshold of 3
+* 12 --> No.
+ Candidate because the previous StoreFile was, but there are not enough files to compact
+
+[[compaction.file.selection.example3]]
+====== Minor Compaction File Selection - Example #3 (Limiting Files To Compact)
+
+This example mirrors an example from the unit test [code]+TestCompactSelection+.
+
+* [code]+hbase.hstore.compaction.ratio+ = 1.0f
+* [code]+hbase.hstore.compaction.min+ = 3 (files)
+* [code]+hbase.hstore.compaction.max+ = 5 (files)
+* [code]+hbase.hstore.compaction.min.size+ = 10 (bytes)
+* [code]+hbase.hstore.compaction.max.size+ = 1000 (bytes) The following StoreFiles exist: 7, 6, 5, 4, 3, 2, and 1 bytes apiece (oldest to newest). With the above parameters, the files that would be selected for minor compaction are 7, 6, 5, 4, 3.
+
+Why?
+
+* 7 -> Yes, because sum(6, 5, 4, 3, 2, 1) * 1.0 = 21.
+ Also, 7 is less than the min-size
+* 6 -> Yes, because sum(5, 4, 3, 2, 1) * 1.0 = 15.
+ Also, 6 is less than the min-size.
+* 5 -> Yes, because sum(4, 3, 2, 1) * 1.0 = 10.
+ Also, 5 is less than the min-size.
+* 4 -> Yes, because sum(3, 2, 1) * 1.0 = 6.
+ Also, 4 is less than the min-size.
+* 3 -> Yes, because sum(2, 1) * 1.0 = 3.
+ Also, 3 is less than the min-size.
+* 2 -> No.
+ Candidate because previous file was selected and 2 is less than the min-size, but the max-number of files to compact has been reached.
+* 1 -> No.
+ Candidate because previous file was selected and 1 is less than the min-size, but max-number of files to compact has been reached.
+
+[[compaction.config.impact]]
+.Impact of Key Configuration Options
+
+NOTE: This information is now included in the configuration parameter table in <>.
+
+[[ops.stripe]]
+===== Experimental: Stripe Compactions
+
+Stripe compactions is an experimental feature added in HBase 0.98 which aims to improve compactions for large regions or non-uniformly distributed row keys.
+In order to achieve smaller and/or more granular compactions, the StoreFiles within a region are maintained separately for several row-key sub-ranges, or "stripes", of the region.
+The stripes are transparent to the rest of HBase, so other operations on the HFiles or data work without modification.
+
+Stripe compactions change the HFile layout, creating sub-regions within regions.
+These sub-regions are easier to compact, and should result in fewer major compactions.
+This approach alleviates some of the challenges of larger regions.
+
+Stripe compaction is fully compatible with <> and works in conjunction with either the ExploringCompactionPolicy or RatioBasedCompactionPolicy.
+It can be enabled for existing tables, and the table will continue to operate normally if it is disabled later.
+
+[[ops.stripe.when]]
+===== When To Use Stripe Compactions
+
+Consider using stripe compaction if you have either of the following:
+
+* Large regions.
+ You can get the positive effects of smaller regions without additional overhead for MemStore and region management overhead.
+* Non-uniform keys, such as time dimension in a key.
+ Only the stripes receiving the new keys will need to compact.
+ Old data will not compact as often, if at all
+
+.Performance Improvements
+Performance testing has shown that the performance of reads improves somewhat, and variability of performance of reads and writes is greatly reduced.
+An overall long-term performance improvement is seen on large non-uniform-row key regions, such as a hash-prefixed timestamp key.
+These performance gains are the most dramatic on a table which is already large.
+It is possible that the performance improvement might extend to region splits.
+
+[[ops.stripe.enable]]
+====== Enabling Stripe Compaction
+
+You can enable stripe compaction for a table or a column family, by setting its [var]+hbase.hstore.engine.class+ to [var]+org.apache.hadoop.hbase.regionserver.StripeStoreEngine+.
+You also need to set the [var]+hbase.hstore.blockingStoreFiles+ to a high number, such as 100 (rather than the default value of 10).
+
+.Procedure: Enable Stripe Compaction
+. If the table already exists, disable the table.
+. Run one of following commands in the HBase shell.
+ Replace the table name [literal]+orders_table+ with the name of your table.
++
+----
+
+alter 'orders_table', CONFIGURATION => {'hbase.hstore.engine.class' => 'org.apache.hadoop.hbase.regionserver.StripeStoreEngine', 'hbase.hstore.blockingStoreFiles' => '100'}
+alter 'orders_table', {NAME => 'blobs_cf', CONFIGURATION => {'hbase.hstore.engine.class' => 'org.apache.hadoop.hbase.regionserver.StripeStoreEngine', 'hbase.hstore.blockingStoreFiles' => '100'}}
+create 'orders_table', 'blobs_cf', CONFIGURATION => {'hbase.hstore.engine.class' => 'org.apache.hadoop.hbase.regionserver.StripeStoreEngine', 'hbase.hstore.blockingStoreFiles' => '100'}
+----
+
+. Configure other options if needed.
+ See <> for more information.
+. Enable the table.
+
+.Procedure: Disable Stripe Compaction
+. Disable the table.
+. Set the [var]+hbase.hstore.engine.class+ option to either nil or [literal]+org.apache.hadoop.hbase.regionserver.DefaultStoreEngine+.
+ Either option has the same effect.
++
+----
+
+alter 'orders_table', CONFIGURATION => {'hbase.hstore.engine.class' => ''}
+----
+
+. Enable the table.
+
+When you enable a large table after changing the store engine either way, a major compaction will likely be performed on most regions.
+This is not necessary on new tables.
+
+[[ops.stripe.config]]
+====== Configuring Stripe Compaction
+
+Each of the settings for stripe compaction should be configured at the table or column family, after disabling the table.
+If you use HBase shell, the general command pattern is as follows:
+
+[source,sql]
+----
+
+alter 'orders_table', CONFIGURATION => {'key' => 'value', ..., 'key' => 'value'}}
+----
+
+[[ops.stripe.config.sizing]]
+.Region and stripe sizing
+
+You can configure your stripe sizing bsaed upon your region sizing.
+By default, your new regions will start with one stripe.
+On the next compaction after the stripe has grown too large (16 x MemStore flushes size), it is split into two stripes.
+Stripe splitting continues as the region grows, until the region is large enough to split.
+
+You can improve this pattern for your own data.
+A good rule is to aim for a stripe size of at least 1 GB, and about 8-12 stripes for uniform row keys.
+For example, if your regions are 30 GB, 12 x 2.5 GB stripes might be a good starting point.
+
+.Stripe Sizing Settings
+[cols="1,1", frame="all", options="header"]
+|===
+| Setting
+| Notes
+|
+ hbase.store.stripe.initialStripeCount
+
+|
+ The number of stripes to create when stripe compaction is enabled.
+ You can use it as follows:
+
+ For relatively uniform row keys, if you know the approximate
+ target number of stripes from the above, you can avoid some
+ splitting overhead by starting with several stripes (2, 5, 10...).
+ If the early data is not representative of overall row key
+ distribution, this will not be as efficient.
+
+
+ For existing tables with a large amount of data, this setting
+ will effectively pre-split your stripes.
+
+
+ For keys such as hash-prefixed sequential keys, with more than
+ one hash prefix per region, pre-splitting may make sense.
+
+
+
+
+|
+ hbase.store.stripe.sizeToSplit
+
+| The maximum size a stripe grows before splitting. Use this in
+ conjunction with hbase.store.stripe.splitPartCount to
+ control the target stripe size (sizeToSplit = splitPartsCount * target
+ stripe size), according to the above sizing considerations.
+
+|
+ hbase.store.stripe.splitPartCount
+
+|===
+
+[[ops.stripe.config.memstore]]
+.MemStore Size Settings
+
+By default, the flush creates several files from one MemStore, according to existing stripe boundaries and row keys to flush.
+This approach minimizes write amplification, but can be undesirable if the MemStore is small and there are many stripes, because the files will be too small.
+
+In this type of situation, you can set [var]+hbase.store.stripe.compaction.flushToL0+ to [literal]+true+.
+This will cause a MemStore flush to create a single file instead.
+When at least [var]+hbase.store.stripe.compaction.minFilesL0+ such files (by default, 4) accumulate, they will be compacted into striped files.
+
+[[ops.stripe.config.compact]]
+.Normal Compaction Configuration and Stripe Compaction
+
+All the settings that apply to normal compactions (see <>) apply to stripe compactions.
+The exceptions are the minimum and maximum number of files, which are set to higher values by default because the files in stripes are smaller.
+To control these for stripe compactions, use [var]+hbase.store.stripe.compaction.minFiles+ and [var]+hbase.store.stripe.compaction.maxFiles+, rather than [var]+hbase.hstore.compaction.min+ and [var]+hbase.hstore.compaction.max+.
+
+[[arch.bulk.load]]
+== Bulk Loading
+
+[[arch.bulk.load.overview]]
+=== Overview
+
+HBase includes several methods of loading data into tables.
+The most straightforward method is to either use the [code]+TableOutputFormat+ class from a MapReduce job, or use the normal client APIs; however, these are not always the most efficient methods.
+
+The bulk load feature uses a MapReduce job to output table data in HBase's internal data format, and then directly loads the generated StoreFiles into a running cluster.
+Using bulk load will use less CPU and network resources than simply using the HBase API.
+
+[[arch.bulk.load.limitations]]
+=== Bulk Load Limitations
+
+As bulk loading bypasses the write path, the WAL doesn't get written to as part of the process.
+Replication works by reading the WAL files so it won't see the bulk loaded data – and the same goes for the edits that use Put.setWriteToWAL(true). One way to handle that is to ship the raw files or the HFiles to the other cluster and do the other processing there.
+
+[[arch.bulk.load.arch]]
+=== Bulk Load Architecture
+
+The HBase bulk load process consists of two main steps.
+
+[[arch.bulk.load.prep]]
+==== Preparing data via a MapReduce job
+
+The first step of a bulk load is to generate HBase data files (StoreFiles) from a MapReduce job using [code]+HFileOutputFormat+.
+This output format writes out data in HBase's internal storage format so that they can be later loaded very efficiently into the cluster.
+
+In order to function efficiently, [code]+HFileOutputFormat+ must be configured such that each output HFile fits within a single region.
+In order to do this, jobs whose output will be bulk loaded into HBase use Hadoop's [code]+TotalOrderPartitioner+ class to partition the map output into disjoint ranges of the key space, corresponding to the key ranges of the regions in the table.
+
+[code]+HFileOutputFormat+ includes a convenience function, [code]+configureIncrementalLoad()+, which automatically sets up a [code]+TotalOrderPartitioner+ based on the current region boundaries of a table.
+
+[[arch.bulk.load.complete]]
+==== Completing the data load
+
+After the data has been prepared using [code]+HFileOutputFormat+, it is loaded into the cluster using [code]+completebulkload+.
+This command line tool iterates through the prepared data files, and for each one determines the region the file belongs to.
+It then contacts the appropriate Region Server which adopts the HFile, moving it into its storage directory and making the data available to clients.
+
+If the region boundaries have changed during the course of bulk load preparation, or between the preparation and completion steps, the [code]+completebulkloads+ utility will automatically split the data files into pieces corresponding to the new boundaries.
+This process is not optimally efficient, so users should take care to minimize the delay between preparing a bulk load and importing it into the cluster, especially if other clients are simultaneously loading data through other means.
+
+[[arch.bulk.load.import]]
+=== Importing the prepared data using the completebulkload tool
+
+After a data import has been prepared, either by using the [code]+importtsv+ tool with the "[code]+importtsv.bulk.output+" option or by some other MapReduce job using the [code]+HFileOutputFormat+, the [code]+completebulkload+ tool is used to import the data into the running cluster.
+
+The [code]+completebulkload+ tool simply takes the output path where [code]+importtsv+ or your MapReduce job put its results, and the table name to import into.
+For example:
+
+----
+$ hadoop jar hbase-server-VERSION.jar completebulkload [-c /path/to/hbase/config/hbase-site.xml] /user/todd/myoutput mytable
+----
+
+The [code]+-c config-file+ option can be used to specify a file containing the appropriate hbase parameters (e.g., hbase-site.xml) if not supplied already on the CLASSPATH (In addition, the CLASSPATH must contain the directory that has the zookeeper configuration file if zookeeper is NOT managed by HBase).
+
+Note: If the target table does not already exist in HBase, this tool will create the table automatically.
+
+This tool will run quickly, after which point the new data will be visible in the cluster.
+
+[[arch.bulk.load.also]]
+=== See Also
+
+For more information about the referenced utilities, see <> and <>.
+
+See link:http://blog.cloudera.com/blog/2013/09/how-to-use-hbase-bulk-loading-and-why/[How-to: Use HBase Bulk Loading, and Why] for a recent blog on current state of bulk loading.
+
+[[arch.bulk.load.adv]]
+=== Advanced Usage
+
+Although the [code]+importtsv+ tool is useful in many cases, advanced users may want to generate data programatically, or import data from other formats.
+To get started doing so, dig into [code]+ImportTsv.java+ and check the JavaDoc for HFileOutputFormat.
+
+The import step of the bulk load can also be done programatically.
+See the [code]+LoadIncrementalHFiles+ class for more information.
+
+[[arch.hdfs]]
+== HDFS
+
+As HBase runs on HDFS (and each StoreFile is written as a file on HDFS), it is important to have an understanding of the HDFS Architecture especially in terms of how it stores files, handles failovers, and replicates blocks.
+
+See the Hadoop documentation on link:http://hadoop.apache.org/common/docs/current/hdfs_design.html[HDFS Architecture] for more information.
+
+[[arch.hdfs.nn]]
+=== NameNode
+
+The NameNode is responsible for maintaining the filesystem metadata.
+See the above HDFS Architecture link for more information.
+
+[[arch.hdfs.dn]]
+=== DataNode
+
+The DataNodes are responsible for storing HDFS blocks.
+See the above HDFS Architecture link for more information.
+
+[[arch.timelineconsistent.reads]]
+== Timeline-consistent High Available Reads
+
+[[casestudies.timelineconsistent.intro]]
+=== Introduction
+
+HBase, architecturally, always had the strong consistency guarantee from the start.
+All reads and writes are routed through a single region server, which guarantees that all writes happen in an order, and all reads are seeing the most recent committed data.
+
+
+However, because of this single homing of the reads to a single location, if the server becomes unavailable, the regions of the table that were hosted in the region server become unavailable for some time.
+There are three phases in the region recovery process - detection, assignment, and recovery.
+Of these, the detection is usually the longest and is presently in the order of 20-30 seconds depending on the zookeeper session timeout.
+During this time and before the recovery is complete, the clients will not be able to read the region data.
+
+However, for some use cases, either the data may be read-only, or doing reads againsts some stale data is acceptable.
+With timeline-consistent high available reads, HBase can be used for these kind of latency-sensitive use cases where the application can expect to have a time bound on the read completion.
+
+
+For achieving high availability for reads, HBase provides a feature called ``region replication''. In this model, for each region of a table, there will be multiple replicas that are opened in different region servers.
+By default, the region replication is set to 1, so only a single region replica is deployed and there will not be any changes from the original model.
+If region replication is set to 2 or more, than the master will assign replicas of the regions of the table.
+The Load Balancer ensures that the region replicas are not co-hosted in the same region servers and also in the same rack (if possible).
+
+All of the replicas for a single region will have a unique replica_id, starting from 0.
+The region replica having replica_id==0 is called the primary region, and the others ``secondary regions'' or secondaries.
+Only the primary can accept writes from the client, and the primary will always contain the latest changes.
+Since all writes still have to go through the primary region, the writes are not highly-available (meaning they might block for some time if the region becomes unavailable).
+
+The writes are asynchronously sent to the secondary region replicas using an ``Async WAL replication'' feature.
+This works similarly to HBase's multi-datacenter replication, but instead the data from a region is replicated to the secondary regions.
+Each secondary replica always receives and observes the writes in the same order that the primary region committed them.
+This ensures that the secondaries won't diverge from the primary regions data, but since the log replication is asnyc, the data might be stale in secondary regions.
+In some sense, this design can be thought of as ``in-cluster replication'', where instead of replicating to a different datacenter, the data goes to a secondary region to keep secondary region's in-memory state up to date.
+The data files are shared between the primary region and the other replicas, so that there is no extra storage overhead.
+However, the secondary regions will have recent non-flushed data in their memstores, which increases the memory overhead.
+
+
+Async WAL replication feature is being implemented in Phase 2 of issue HBASE-10070.
+Before this, region replicas will only be updated with flushed data files from the primary (see hbase.regionserver.storefile.refresh.period below). It is also possible to use this without setting storefile.refresh.period for read only tables.
+
+
+=== Timeline Consistency
+
+With this feature, HBase introduces a Consistency definition, which can be provided per read operation (get or scan).
+[source,java]
+----
+public enum Consistency {
+ STRONG,
+ TIMELINE
+}
+----
+[code]+Consistency.STRONG+ is the default consistency model provided by HBase.
+In case the table has region replication = 1, or in a table with region replicas but the reads are done with this consistency, the read is always performed by the primary regions, so that there will not be any change from the previous behaviour, and the client always observes the latest data.
+
+
+In case a read is performed with [code]+Consistency.TIMELINE+, then the read RPC will be sent to the primary region server first.
+After a short interval ([code]+hbase.client.primaryCallTimeout.get+, 10ms by default), parallel RPC for secondary region replicas will also be sent if the primary does not respond back.
+After this, the result is returned from whichever RPC is finished first.
+If the response came back from the primary region replica, we can always know that the data is latest.
+For this Result.isStale() API has been added to inspect the staleness.
+If the result is from a secondary region, then Result.isStale() will be set to true.
+The user can then inspect this field to possibly reason about the data.
+
+
+In terms of semantics, TIMELINE consistency as implemented by HBase differs from pure eventual consistency in these respects:
+
+* Single homed and ordered updates: Region replication or not, on the write side, there is still only 1 defined replica (primary) which can accept writes.
+ This replica is responsible for ordering the edits and preventing conflicts.
+ This guarantees that two different writes are not committed at the same time by different replicas and the data diverges.
+ With this, there is no need to do read-repair or last-timestamp-wins kind of conflict resolution.
+* The secondaries also apply the edits in the order that the primary committed them.
+ This way the secondaries will contain a snapshot of the primaries data at any point in time.
+ This is similar to RDBMS replications and even HBase's own multi-datacenter replication, however in a single cluster.
+* On the read side, the client can detect whether the read is coming from up-to-date data or is stale data.
+ Also, the client can issue reads with different consistency requirements on a per-operation basis to ensure its own semantic guarantees.
+* The client can still observe edits out-of-order, and can go back in time, if it observes reads from one secondary replica first, then another secondary replica.
+ There is no stickiness to region replicas or a transaction-id based guarantee.
+ If required, this can be implemented later though.
+
+.HFile Version 1
+image::timeline_consistency.png[HFile Version 1]
+
+To better understand the TIMELINE semantics, lets look at the above diagram.
+Lets say that there are two clients, and the first one writes x=1 at first, then x=2 and x=3 later.
+As above, all writes are handled by the primary region replica.
+The writes are saved in the write ahead log (WAL), and replicated to the other replicas asynchronously.
+In the above diagram, notice that replica_id=1 received 2 updates, and it's data shows that x=2, while the replica_id=2 only received a single update, and its data shows that x=1.
+
+
+If client1 reads with STRONG consistency, it will only talk with the replica_id=0, and thus is guaranteed to observe the latest value of x=3.
+In case of a client issuing TIMELINE consistency reads, the RPC will go to all replicas (after primary timeout) and the result from the first response will be returned back.
+Thus the client can see either 1, 2 or 3 as the value of x.
+Let's say that the primary region has failed and log replication cannot continue for some time.
+If the client does multiple reads with TIMELINE consistency, she can observe x=2 first, then x=1, and so on.
+
+
+=== Tradeoffs
+
+Having secondary regions hosted for read availability comes with some tradeoffs which should be carefully evaluated per use case.
+Following are advantages and disadvantages.
+
+* .AdvantagesHigh availability for read-only tables.
+* High availability for stale reads
+* Ability to do very low latency reads with very high percentile (99.9%+) latencies for stale reads
+
+* .DisadvantagesDouble / Triple memstore usage (depending on region replication count) for tables with region replication > 1
+* Increased block cache usage
+* Extra network traffic for log replication
+* Extra backup RPCs for replicas
+
+To serve the region data from multiple replicas, HBase opens the regions in secondary mode in the region servers.
+The regions opened in secondary mode will share the same data files with the primary region replica, however each secondary region replica will have its own memstore to keep the unflushed data (only primary region can do flushes). Also to serve reads from secondary regions, the blocks of data files may be also cached in the block caches for the secondary regions.
+
+=== Configuration properties
+
+To use highly available reads, you should set the following properties in hbase-site.xml file.
+There is no specific configuration to enable or disable region replicas.
+Instead you can change the number of region replicas per table to increase or decrease at the table creation or with alter table.
+
+
+==== Server side properties
+
+[source,xml]
+----
+
+
+ hbase.regionserver.storefile.refresh.period
+ 0
+
+ The period (in milliseconds) for refreshing the store files for the secondary regions. 0 means this feature is disabled. Secondary regions sees new files (from flushes and compactions) from primary once the secondary region refreshes the list of files in the region. But too frequent refreshes might cause extra Namenode pressure. If the files cannot be refreshed for longer than HFile TTL (hbase.master.hfilecleaner.ttl) the requests are rejected. Configuring HFile TTL to a larger value is also recommended with this setting.
+
+
+----
+
+One thing to keep in mind also is that, region replica placement policy is only enforced by the [code]+StochasticLoadBalancer+ which is the default balancer.
+If you are using a custom load balancer property in hbase-site.xml ([code]+hbase.master.loadbalancer.class+) replicas of regions might end up being hosted in the same server.
+
+==== Client side properties
+
+Ensure to set the following for all clients (and servers) that will use region replicas.
+
+[source,xml]
+----
+
+
+ hbase.ipc.client.allowsInterrupt
+ true
+
+ Whether to enable interruption of RPC threads at the client side. This is required for region replicas with fallback RPC’s to secondary regions.
+
+
+
+ hbase.client.primaryCallTimeout.get
+ 10000
+
+ The timeout (in microseconds), before secondary fallback RPC’s are submitted for get requests with Consistency.TIMELINE to the secondary replicas of the regions. Defaults to 10ms. Setting this lower will increase the number of RPC’s, but will lower the p99 latencies.
+
+
+
+ hbase.client.primaryCallTimeout.multiget
+ 10000
+
+ The timeout (in microseconds), before secondary fallback RPC’s are submitted for multi-get requests (HTable.get(List)) with Consistency.TIMELINE to the secondary replicas of the regions. Defaults to 10ms. Setting this lower will increase the number of RPC’s, but will lower the p99 latencies.
+
+
+
+ hbase.client.replicaCallTimeout.scan
+ 1000000
+
+ The timeout (in microseconds), before secondary fallback RPC’s are submitted for scan requests with Consistency.TIMELINE to the secondary replicas of the regions. Defaults to 1 sec. Setting this lower will increase the number of RPC’s, but will lower the p99 latencies.
+
+
+----
+
+=== Creating a table with region replication
+
+Region replication is a per-table property.
+All tables have REGION_REPLICATION = 1 by default, which means that there is only one replica per region.
+You can set and change the number of replicas per region of a table by supplying the REGION_REPLICATION property in the table descriptor.
+
+
+==== Shell
+
+[source]
+----
+
+create 't1', 'f1', {REGION_REPLICATION => 2}
+
+describe 't1'
+for i in 1..100
+put 't1', "r#{i}", 'f1:c1', i
+end
+flush 't1'
+----
+
+==== Java
+
+[source,java]
+----
+
+HTableDescriptor htd = new HTableDesctiptor(TableName.valueOf(“test_table”));
+htd.setRegionReplication(2);
+...
+admin.createTable(htd);
+----
+
+You can also use [code]+setRegionReplication()+ and alter table to increase, decrease the region replication for a table.
+
+=== Region splits and merges
+
+Region splits and merges are not compatible with regions with replicas yet.
+So you have to pre-split the table, and disable the region splits.
+Also you should not execute region merges on tables with region replicas.
+To disable region splits you can use DisabledRegionSplitPolicy as the split policy.
+
+=== User Interface
+
+In the masters user interface, the region replicas of a table are also shown together with the primary regions.
+You can notice that the replicas of a region will share the same start and end keys and the same region name prefix.
+The only difference would be the appended replica_id (which is encoded as hex), and the region encoded name will be different.
+You can also see the replica ids shown explicitly in the UI.
+
+=== API and Usage
+
+==== Shell
+
+You can do reads in shell using a the Consistency.TIMELINE semantics as follows
+
+[source]
+----
+
+hbase(main):001:0> get 't1','r6', {CONSISTENCY => "TIMELINE"}
+----
+
+You can simulate a region server pausing or becoming unavailable and do a read from the secondary replica:
+
+[source,bourne]
+----
+
+$ kill -STOP
+
+hbase(main):001:0> get 't1','r6', {CONSISTENCY => "TIMELINE"}
+----
+
+Using scans is also similar
+
+[source]
+----
+
+hbase> scan 't1', {CONSISTENCY => 'TIMELINE'}
+----
+
+==== Java
+
+You can set set the consistency for Gets and Scans and do requests as follows.
+
+[source,java]
+----
+
+Get get = new Get(row);
+get.setConsistency(Consistency.TIMELINE);
+...
+Result result = table.get(get);
+----
+
+You can also pass multiple gets:
+
+[source,java]
+----
+
+Get get1 = new Get(row);
+get1.setConsistency(Consistency.TIMELINE);
+...
+ArrayList gets = new ArrayList();
+gets.add(get1);
+...
+Result[] results = table.get(gets);
+----
+
+And Scans:
+
+[source,java]
+----
+
+Scan scan = new Scan();
+scan.setConsistency(Consistency.TIMELINE);
+...
+ResultScanner scanner = table.getScanner(scan);
+----
+
+You can inspect whether the results are coming from primary region or not by calling the Result.isStale() method:
+
+[source,java]
+----
+
+Result result = table.get(get);
+if (result.isStale()) {
+ ...
+}
+----
+
+=== Resources
+
+. More information about the design and implementation can be found at the jira issue: link:https://issues.apache.org/jira/browse/HBASE-10070[HBASE-10070]
+. HBaseCon 2014 link:http://hbasecon.com/sessions/#session15[talk] also contains some details and link:http://www.slideshare.net/enissoz/hbase-high-availability-for-reads-with-time[slides].
+
+ifdef::backend-docbook[]
+[index]
+== Index
+// Generated automatically by the DocBook toolchain.
+endif::backend-docbook[]
diff --git a/src/main/asciidoc/_chapters/asf.adoc b/src/main/asciidoc/_chapters/asf.adoc
new file mode 100644
index 0000000..77eed8f
--- /dev/null
+++ b/src/main/asciidoc/_chapters/asf.adoc
@@ -0,0 +1,47 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[appendix]
+[[asf]]
+== HBase and the Apache Software Foundation
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+:toc: left
+:source-language: java
+
+HBase is a project in the Apache Software Foundation and as such there are responsibilities to the ASF to ensure a healthy project.
+
+[[asf.devprocess]]
+=== ASF Development Process
+
+See the link:http://www.apache.org/dev/#committers[Apache Development Process page] for all sorts of information on how the ASF is structured (e.g., PMC, committers, contributors), to tips on contributing and getting involved, and how open-source works at ASF.
+
+[[asf.reporting]]
+=== ASF Board Reporting
+
+Once a quarter, each project in the ASF portfolio submits a report to the ASF board.
+This is done by the HBase project lead and the committers.
+See link:http://www.apache.org/foundation/board/reporting[ASF board reporting] for more information.
+
+:numbered:
diff --git a/src/main/asciidoc/_chapters/case_studies.adoc b/src/main/asciidoc/_chapters/case_studies.adoc
new file mode 100644
index 0000000..f6a43ee
--- /dev/null
+++ b/src/main/asciidoc/_chapters/case_studies.adoc
@@ -0,0 +1,168 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[[casestudies]]
+= Apache HBase Case Studies
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+[[casestudies.overview]]
+== Overview
+
+This chapter will describe a variety of performance and troubleshooting case studies that can provide a useful blueprint on diagnosing Apache HBase cluster issues.
+
+For more information on Performance and Troubleshooting, see <> and <>.
+
+[[casestudies.schema]]
+== Schema Design
+
+See the schema design case studies here: <>
+
+[[casestudies.perftroub]]
+== Performance/Troubleshooting
+
+[[casestudies.slownode]]
+=== Case Study #1 (Performance Issue On A Single Node)
+
+==== Scenario
+
+Following a scheduled reboot, one data node began exhibiting unusual behavior.
+Routine MapReduce jobs run against HBase tables which regularly completed in five or six minutes began taking 30 or 40 minutes to finish.
+These jobs were consistently found to be waiting on map and reduce tasks assigned to the troubled data node (e.g., the slow map tasks all had the same Input Split). The situation came to a head during a distributed copy, when the copy was severely prolonged by the lagging node.
+
+==== Hardware
+
+* .Datanodes:Two 12-core processors
+* Six Enerprise SATA disks
+* 24GB of RAM
+* Two bonded gigabit NICs
+
+* .Network:10 Gigabit top-of-rack switches
+* 20 Gigabit bonded interconnects between racks.
+
+==== Hypotheses
+
+===== HBase "Hot Spot" Region
+
+We hypothesized that we were experiencing a familiar point of pain: a "hot spot" region in an HBase table, where uneven key-space distribution can funnel a huge number of requests to a single HBase region, bombarding the RegionServer process and cause slow response time.
+Examination of the HBase Master status page showed that the number of HBase requests to the troubled node was almost zero.
+Further, examination of the HBase logs showed that there were no region splits, compactions, or other region transitions in progress.
+This effectively ruled out a "hot spot" as the root cause of the observed slowness.
+
+===== HBase Region With Non-Local Data
+
+Our next hypothesis was that one of the MapReduce tasks was requesting data from HBase that was not local to the datanode, thus forcing HDFS to request data blocks from other servers over the network.
+Examination of the datanode logs showed that there were very few blocks being requested over the network, indicating that the HBase region was correctly assigned, and that the majority of the necessary data was located on the node.
+This ruled out the possibility of non-local data causing a slowdown.
+
+===== Excessive I/O Wait Due To Swapping Or An Over-Worked Or Failing Hard Disk
+
+After concluding that the Hadoop and HBase were not likely to be the culprits, we moved on to troubleshooting the datanode's hardware.
+Java, by design, will periodically scan its entire memory space to do garbage collection.
+If system memory is heavily overcommitted, the Linux kernel may enter a vicious cycle, using up all of its resources swapping Java heap back and forth from disk to RAM as Java tries to run garbage collection.
+Further, a failing hard disk will often retry reads and/or writes many times before giving up and returning an error.
+This can manifest as high iowait, as running processes wait for reads and writes to complete.
+Finally, a disk nearing the upper edge of its performance envelope will begin to cause iowait as it informs the kernel that it cannot accept any more data, and the kernel queues incoming data into the dirty write pool in memory.
+However, using [code]+vmstat(1)+ and [code]+free(1)+, we could see that no swap was being used, and the amount of disk IO was only a few kilobytes per second.
+
+===== Slowness Due To High Processor Usage
+
+Next, we checked to see whether the system was performing slowly simply due to very high computational load. [code]+top(1)+ showed that the system load was higher than normal, but [code]+vmstat(1)+ and [code]+mpstat(1)+ showed that the amount of processor being used for actual computation was low.
+
+===== Network Saturation (The Winner)
+
+Since neither the disks nor the processors were being utilized heavily, we moved on to the performance of the network interfaces.
+The datanode had two gigabit ethernet adapters, bonded to form an active-standby interface. [code]+ifconfig(8)+ showed some unusual anomalies, namely interface errors, overruns, framing errors.
+While not unheard of, these kinds of errors are exceedingly rare on modern hardware which is operating as it should:
+
+----
+
+$ /sbin/ifconfig bond0
+bond0 Link encap:Ethernet HWaddr 00:00:00:00:00:00
+inet addr:10.x.x.x Bcast:10.x.x.255 Mask:255.255.255.0
+UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
+RX packets:2990700159 errors:12 dropped:0 overruns:1 frame:6 <--- Look Here! Errors!
+TX packets:3443518196 errors:0 dropped:0 overruns:0 carrier:0
+collisions:0 txqueuelen:0
+RX bytes:2416328868676 (2.4 TB) TX bytes:3464991094001 (3.4 TB)
+----
+
+These errors immediately lead us to suspect that one or more of the ethernet interfaces might have negotiated the wrong line speed.
+This was confirmed both by running an ICMP ping from an external host and observing round-trip-time in excess of 700ms, and by running [code]+ethtool(8)+ on the members of the bond interface and discovering that the active interface was operating at 100Mbs/, full duplex.
+
+----
+
+$ sudo ethtool eth0
+Settings for eth0:
+Supported ports: [ TP ]
+Supported link modes: 10baseT/Half 10baseT/Full
+ 100baseT/Half 100baseT/Full
+ 1000baseT/Full
+Supports auto-negotiation: Yes
+Advertised link modes: 10baseT/Half 10baseT/Full
+ 100baseT/Half 100baseT/Full
+ 1000baseT/Full
+Advertised pause frame use: No
+Advertised auto-negotiation: Yes
+Link partner advertised link modes: Not reported
+Link partner advertised pause frame use: No
+Link partner advertised auto-negotiation: No
+Speed: 100Mb/s <--- Look Here! Should say 1000Mb/s!
+Duplex: Full
+Port: Twisted Pair
+PHYAD: 1
+Transceiver: internal
+Auto-negotiation: on
+MDI-X: Unknown
+Supports Wake-on: umbg
+Wake-on: g
+Current message level: 0x00000003 (3)
+Link detected: yes
+----
+
+In normal operation, the ICMP ping round trip time should be around 20ms, and the interface speed and duplex should read, "1000MB/s", and, "Full", respectively.
+
+==== Resolution
+
+After determining that the active ethernet adapter was at the incorrect speed, we used the [code]+ifenslave(8)+ command to make the standby interface the active interface, which yielded an immediate improvement in MapReduce performance, and a 10 times improvement in network throughput:
+
+On the next trip to the datacenter, we determined that the line speed issue was ultimately caused by a bad network cable, which was replaced.
+
+[[casestudies.perf.1]]
+=== Case Study #2 (Performance Research 2012)
+
+Investigation results of a self-described "we're not sure what's wrong, but it seems slow" problem. link:http://gbif.blogspot.com/2012/03/hbase-performance-evaluation-continued.html
+
+[[casestudies.perf.2]]
+=== Case Study #3 (Performance Research 2010))
+
+Investigation results of general cluster performance from 2010.
+Although this research is on an older version of the codebase, this writeup is still very useful in terms of approach. link:http://hstack.org/hbase-performance-testing/
+
+[[casestudies.max.transfer.threads]]
+=== Case Study #4 (max.transfer.threads Config)
+
+Case study of configuring [code]+max.transfer.threads+ (previously known as [code]+xcievers+) and diagnosing errors from misconfigurations. link:http://www.larsgeorge.com/2012/03/hadoop-hbase-and-xceivers.html
+
+See also <>.
diff --git a/src/main/asciidoc/_chapters/community.adoc b/src/main/asciidoc/_chapters/community.adoc
new file mode 100644
index 0000000..4b91b0d
--- /dev/null
+++ b/src/main/asciidoc/_chapters/community.adoc
@@ -0,0 +1,111 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[[community]]
+= Community
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+== Decisions
+
+.Feature Branches
+
+Feature Branches are easy to make.
+You do not have to be a committer to make one.
+Just request the name of your branch be added to JIRA up on the developer's mailing list and a committer will add it for you.
+Thereafter you can file issues against your feature branch in Apache HBase JIRA.
+Your code you keep elsewhere -- it should be public so it can be observed -- and you can update dev mailing list on progress.
+When the feature is ready for commit, 3 +1s from committers will get your feature merged.
+See link:http://search-hadoop.com/m/asM982C5FkS1[HBase, mail # dev - Thoughts
+ about large feature dev branches]
+
+[[patchplusonepolicy]]
+.Patch +1 Policy
+
+The below policy is something we put in place 09/2012.
+It is a suggested policy rather than a hard requirement.
+We want to try it first to see if it works before we cast it in stone.
+
+Apache HBase is made of link:https://issues.apache.org/jira/browse/HBASE#selectedTab=com.atlassian.jira.plugin.system.project%3Acomponents-panel[components].
+Components have one or more <>s.
+See the 'Description' field on the link:https://issues.apache.org/jira/browse/HBASE#selectedTab=com.atlassian.jira.plugin.system.project%3Acomponents-panel[components] JIRA page for who the current owners are by component.
+
+Patches that fit within the scope of a single Apache HBase component require, at least, a +1 by one of the component's owners before commit.
+If owners are absent -- busy or otherwise -- two +1s by non-owners will suffice.
+
+Patches that span components need at least two +1s before they can be committed, preferably +1s by owners of components touched by the x-component patch (TODO: This needs tightening up but I think fine for first pass).
+
+Any -1 on a patch by anyone vetos a patch; it cannot be committed until the justification for the -1 is addressed.
+
+[[hbase.fix.version.in.jira]]
+.How to set fix version in JIRA on issue resolve
+
+Here is how link:http://search-hadoop.com/m/azemIi5RCJ1[we agreed] to set versions in JIRA when we resolve an issue.
+If trunk is going to be 0.98.0 then:
+
+* Commit only to trunk: Mark with 0.98
+* Commit to 0.95 and trunk : Mark with 0.98, and 0.95.x
+* Commit to 0.94.x and 0.95, and trunk: Mark with 0.98, 0.95.x, and 0.94.x
+* Commit to 89-fb: Mark with 89-fb.
+* Commit site fixes: no version
+
+[[hbase.when.to.close.jira]]
+.Policy on when to set a RESOLVED JIRA as CLOSED
+
+We link:http://search-hadoop.com/m/4cIKs1iwXMS1[agreed] that for issues that list multiple releases in their _Fix Version/s_ field, CLOSE the issue on the release of any of the versions listed; subsequent change to the issue must happen in a new JIRA.
+
+[[no.permanent.state.in.zk]]
+.Only transient state in ZooKeeper!
+
+You should be able to kill the data in zookeeper and hbase should ride over it recreating the zk content as it goes.
+This is an old adage around these parts.
+We just made note of it now.
+We also are currently in violation of this basic tenet -- replication at least keeps permanent state in zk -- but we are working to undo this breaking of a golden rule.
+
+[[community.roles]]
+== Community Roles
+
+[[owner]]
+.Component Owner/Lieutenant
+
+Component owners are listed in the description field on this Apache HBase JIRA link:https://issues.apache.org/jira/browse/HBASE#selectedTab=com.atlassian.jira.plugin.system.project%3Acomponents-panel[components] page.
+The owners are listed in the 'Description' field rather than in the 'Component Lead' field because the latter only allows us list one individual whereas it is encouraged that components have multiple owners.
+
+Owners or component lieutenants are volunteers who are (usually, but not necessarily) expert in their component domain and may have an agenda on how they think their Apache HBase component should evolve.
+
+. Owners will try and review patches that land within their component's scope.
+. If applicable, if an owner has an agenda, they will publish their goals or the design toward which they are driving their component
+
+If you would like to be volunteer as a component owner, just write the dev list and we'll sign you up.
+Owners do not need to be committers.
+
+[[hbase.commit.msg.format]]
+== Commit Message format
+
+We link:http://search-hadoop.com/m/Gwxwl10cFHa1[agreed] to the following SVN commit message format:
+[source]
+----
+HBASE-xxxxx . ()
+----
+If the person making the commit is the contributor, leave off the '()' element.
diff --git a/src/main/asciidoc/_chapters/compression.adoc b/src/main/asciidoc/_chapters/compression.adoc
new file mode 100644
index 0000000..75a050f
--- /dev/null
+++ b/src/main/asciidoc/_chapters/compression.adoc
@@ -0,0 +1,460 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[appendix]
+[[compression]]
+== Compression and Data Block Encoding In HBase(((Compression,Data BlockEncoding)))
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+NOTE: Codecs mentioned in this section are for encoding and decoding data blocks or row keys.
+For information about replication codecs, see <>.
+
+Some of the information in this section is pulled from a link:http://search-hadoop.com/m/lL12B1PFVhp1/v=threaded[discussion] on the HBase Development mailing list.
+
+HBase supports several different compression algorithms which can be enabled on a ColumnFamily.
+Data block encoding attempts to limit duplication of information in keys, taking advantage of some of the fundamental designs and patterns of HBase, such as sorted row keys and the schema of a given table.
+Compressors reduce the size of large, opaque byte arrays in cells, and can significantly reduce the storage space needed to store uncompressed data.
+
+Compressors and data block encoding can be used together on the same ColumnFamily.
+
+.Changes Take Effect Upon Compaction
+If you change compression or encoding for a ColumnFamily, the changes take effect during compaction.
+
+Some codecs take advantage of capabilities built into Java, such as GZip compression. Others rely on native libraries. Native libraries may be available as part of Hadoop, such as LZ4. In this case, HBase only needs access to the appropriate shared library.
+
+Other codecs, such as Google Snappy, need to be installed first.
+Some codecs are licensed in ways that conflict with HBase's license and cannot be shipped as part of HBase.
+
+This section discusses common codecs that are used and tested with HBase.
+No matter what codec you use, be sure to test that it is installed correctly and is available on all nodes in your cluster.
+Extra operational steps may be necessary to be sure that codecs are available on newly-deployed nodes.
+You can use the <> utility to check that a given codec is correctly installed.
+
+To configure HBase to use a compressor, see <>.
+To enable a compressor for a ColumnFamily, see <>.
+To enable data block encoding for a ColumnFamily, see <>.
+
+.Block Compressors
+* none
+* Snappy
+* LZO
+* LZ4
+* GZ
+
+.Data Block Encoding Types
+Prefix::
+ Often, keys are very similar. Specifically, keys often share a common prefix and only differ near the end. For instance, one key might be [literal]+RowKey:Family:Qualifier0+ and the next key might be [literal]+RowKey:Family:Qualifier1+.
+ +
+In Prefix encoding, an extra column is added which holds the length of the prefix shared between the current key and the previous key.
+Assuming the first key here is totally different from the key before, its prefix length is 0.
++
+The second key's prefix length is [literal]+23+, since they have the first 23 characters in common.
++
+Obviously if the keys tend to have nothing in common, Prefix will not provide much benefit.
++
+The following image shows a hypothetical ColumnFamily with no data block encoding.
++
+.ColumnFamily with No Encoding
+image::data_block_no_encoding.png[]
++
+Here is the same data with prefix data encoding.
++
+.ColumnFamily with Prefix Encoding
+image::data_block_prefix_encoding.png[]
+
+Diff::
+ Diff encoding expands upon Prefix encoding.
+ Instead of considering the key sequentially as a monolithic series of bytes, each key field is split so that each part of the key can be compressed more efficiently.
++
+Two new fields are added: timestamp and type.
++
+If the ColumnFamily is the same as the previous row, it is omitted from the current row.
++
+If the key length, value length or type are the same as the previous row, the field is omitted.
++
+In addition, for increased compression, the timestamp is stored as a Diff from the previous row's timestamp, rather than being stored in full.
+Given the two row keys in the Prefix example, and given an exact match on timestamp and the same type, neither the value length, or type needs to be stored for the second row, and the timestamp value for the second row is just 0, rather than a full timestamp.
++
+Diff encoding is disabled by default because writing and scanning are slower but more data is cached.
++
+This image shows the same ColumnFamily from the previous images, with Diff encoding.
++
+.ColumnFamily with Diff Encoding
+image::data_block_diff_encoding.png[]
+
+Fast Diff::
+ Fast Diff works similar to Diff, but uses a faster implementation. It also adds another field which stores a single bit to track whether the data itself is the same as the previous row. If it is, the data is not stored again.
++
+Fast Diff is the recommended codec to use if you have long keys or many columns.
++
+The data format is nearly identical to Diff encoding, so there is not an image to illustrate it.
+
+
+Prefix Tree::
+ Prefix tree encoding was introduced as an experimental feature in HBase 0.96.
+ It provides similar memory savings to the Prefix, Diff, and Fast Diff encoder, but provides faster random access at a cost of slower encoding speed.
++
+Prefix Tree may be appropriate for applications that have high block cache hit ratios. It introduces new 'tree' fields for the row and column.
+The row tree field contains a list of offsets/references corresponding to the cells in that row. This allows for a good deal of compression.
+For more details about Prefix Tree encoding, see link:https://issues.apache.org/jira/browse/HBASE-4676[HBASE-4676].
++
+It is difficult to graphically illustrate a prefix tree, so no image is included. See the Wikipedia article for link:http://en.wikipedia.org/wiki/Trie[Trie] for more general information about this data structure.
+
+=== Which Compressor or Data Block Encoder To Use
+
+The compression or codec type to use depends on the characteristics of your data. Choosing the wrong type could cause your data to take more space rather than less, and can have performance implications.
+
+In general, you need to weigh your options between smaller size and faster compression/decompression. Following are some general guidelines, expanded from a discussion at link:http://search-hadoop.com/m/lL12B1PFVhp1[Documenting Guidance on compression and codecs].
+
+* If you have long keys (compared to the values) or many columns, use a prefix encoder.
+ FAST_DIFF is recommended, as more testing is needed for Prefix Tree encoding.
+* If the values are large (and not precompressed, such as images), use a data block compressor.
+* Use GZIP for [firstterm]_cold data_, which is accessed infrequently.
+ GZIP compression uses more CPU resources than Snappy or LZO, but provides a higher compression ratio.
+* Use Snappy or LZO for [firstterm]_hot data_, which is accessed frequently.
+ Snappy and LZO use fewer CPU resources than GZIP, but do not provide as high of a compression ratio.
+* In most cases, enabling Snappy or LZO by default is a good choice, because they have a low performance overhead and provide space savings.
+* Before Snappy became available by Google in 2011, LZO was the default.
+ Snappy has similar qualities as LZO but has been shown to perform better.
+
+[[hadoop.native.lib]]
+=== Making use of Hadoop Native Libraries in HBase
+
+The Hadoop shared library has a bunch of facility including compression libraries and fast crc'ing. To make this facility available to HBase, do the following. HBase/Hadoop will fall back to use alternatives if it cannot find the native library versions -- or fail outright if you asking for an explicit compressor and there is no alternative available.
+
+If you see the following in your HBase logs, you know that HBase was unable to locate the Hadoop native libraries:
+[source]
+----
+2014-08-07 09:26:20,139 WARN [main] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+----
+If the libraries loaded successfully, the WARN message does not show.
+
+Lets presume your Hadoop shipped with a native library that suits the platform you are running HBase on.
+To check if the Hadoop native library is available to HBase, run the following tool (available in Hadoop 2.1 and greater):
+[source]
+----
+$ ./bin/hbase --config ~/conf_hbase org.apache.hadoop.util.NativeLibraryChecker
+2014-08-26 13:15:38,717 WARN [main] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+Native library checking:
+hadoop: false
+zlib: false
+snappy: false
+lz4: false
+bzip2: false
+2014-08-26 13:15:38,863 INFO [main] util.ExitUtil: Exiting with status 1
+----
+Above shows that the native hadoop library is not available in HBase context.
+
+To fix the above, either copy the Hadoop native libraries local or symlink to them if the Hadoop and HBase stalls are adjacent in the filesystem.
+You could also point at their location by setting the [var]+LD_LIBRARY_PATH+ environment variable.
+
+Where the JVM looks to find native librarys is "system dependent" (See [class]+java.lang.System#loadLibrary(name)+). On linux, by default, is going to look in [path]_lib/native/PLATFORM_ where [var]+PLATFORM+ is the label for the platform your HBase is installed on.
+On a local linux machine, it seems to be the concatenation of the java properties [var]+os.name+ and [var]+os.arch+ followed by whether 32 or 64 bit.
+HBase on startup prints out all of the java system properties so find the os.name and os.arch in the log.
+For example:
+[source]
+----
+...
+2014-08-06 15:27:22,853 INFO [main] zookeeper.ZooKeeper: Client environment:os.name=Linux
+2014-08-06 15:27:22,853 INFO [main] zookeeper.ZooKeeper: Client environment:os.arch=amd64
+...
+----
+So in this case, the PLATFORM string is [var]+Linux-amd64-64+.
+Copying the Hadoop native libraries or symlinking at [path]_lib/native/Linux-amd64-64_ will ensure they are found.
+Check with the Hadoop [path]_NativeLibraryChecker_.
+
+
+Here is example of how to point at the Hadoop libs with [var]+LD_LIBRARY_PATH+ environment variable:
+[source]
+----
+$ LD_LIBRARY_PATH=~/hadoop-2.5.0-SNAPSHOT/lib/native ./bin/hbase --config ~/conf_hbase org.apache.hadoop.util.NativeLibraryChecker
+2014-08-26 13:42:49,332 INFO [main] bzip2.Bzip2Factory: Successfully loaded & initialized native-bzip2 library system-native
+2014-08-26 13:42:49,337 INFO [main] zlib.ZlibFactory: Successfully loaded & initialized native-zlib library
+Native library checking:
+hadoop: true /home/stack/hadoop-2.5.0-SNAPSHOT/lib/native/libhadoop.so.1.0.0
+zlib: true /lib64/libz.so.1
+snappy: true /usr/lib64/libsnappy.so.1
+lz4: true revision:99
+bzip2: true /lib64/libbz2.so.1
+----
+Set in [path]_hbase-env.sh_ the LD_LIBRARY_PATH environment variable when starting your HBase.
+
+=== Compressor Configuration, Installation, and Use
+
+[[compressor.install]]
+==== Configure HBase For Compressors
+
+Before HBase can use a given compressor, its libraries need to be available.
+Due to licensing issues, only GZ compression is available to HBase (via native Java libraries) in a default installation.
+Other compression libraries are available via the shared library bundled with your hadoop.
+The hadoop native library needs to be findable when HBase starts.
+See
+
+.Compressor Support On the Master
+
+A new configuration setting was introduced in HBase 0.95, to check the Master to determine which data block encoders are installed and configured on it, and assume that the entire cluster is configured the same.
+This option, [code]+hbase.master.check.compression+, defaults to [literal]+true+.
+This prevents the situation described in link:https://issues.apache.org/jira/browse/HBASE-6370[HBASE-6370], where a table is created or modified to support a codec that a region server does not support, leading to failures that take a long time to occur and are difficult to debug.
+
+If [code]+hbase.master.check.compression+ is enabled, libraries for all desired compressors need to be installed and configured on the Master, even if the Master does not run a region server.
+
+.Install GZ Support Via Native Libraries
+
+HBase uses Java's built-in GZip support unless the native Hadoop libraries are available on the CLASSPATH.
+The recommended way to add libraries to the CLASSPATH is to set the environment variable [var]+HBASE_LIBRARY_PATH+ for the user running HBase.
+If native libraries are not available and Java's GZIP is used, [literal]+Got
+ brand-new compressor+ reports will be present in the logs.
+See <>).
+
+[[lzo.compression]]
+.Install LZO Support
+
+HBase cannot ship with LZO because of incompatibility between HBase, which uses an Apache Software License (ASL) and LZO, which uses a GPL license.
+See the link:http://wiki.apache.org/hadoop/UsingLzoCompression[Using LZO
+ Compression] wiki page for information on configuring LZO support for HBase.
+
+If you depend upon LZO compression, consider configuring your RegionServers to fail to start if LZO is not available.
+See <>.
+
+[[lz4.compression]]
+.Configure LZ4 Support
+
+LZ4 support is bundled with Hadoop.
+Make sure the hadoop shared library (libhadoop.so) is accessible when you start HBase.
+After configuring your platform (see <>), you can make a symbolic link from HBase to the native Hadoop libraries.
+This assumes the two software installs are colocated.
+For example, if my 'platform' is Linux-amd64-64:
+[source,bourne]
+----
+$ cd $HBASE_HOME
+$ mkdir lib/native
+$ ln -s $HADOOP_HOME/lib/native lib/native/Linux-amd64-64
+----
+Use the compression tool to check that LZ4 is installed on all nodes.
+Start up (or restart) HBase.
+Afterward, you can create and alter tables to enable LZ4 as a compression codec.:
+----
+hbase(main):003:0> alter 'TestTable', {NAME => 'info', COMPRESSION => 'LZ4'}
+----
+
+[[snappy.compression.installation]]
+.Install Snappy Support
+
+HBase does not ship with Snappy support because of licensing issues.
+You can install Snappy binaries (for instance, by using +yum install snappy+ on CentOS) or build Snappy from source.
+After installing Snappy, search for the shared library, which will be called [path]_libsnappy.so.X_ where X is a number.
+If you built from source, copy the shared library to a known location on your system, such as [path]_/opt/snappy/lib/_.
+
+In addition to the Snappy library, HBase also needs access to the Hadoop shared library, which will be called something like [path]_libhadoop.so.X.Y_, where X and Y are both numbers.
+Make note of the location of the Hadoop library, or copy it to the same location as the Snappy library.
+
+[NOTE]
+====
+The Snappy and Hadoop libraries need to be available on each node of your cluster.
+See <> to find out how to test that this is the case.
+
+See <> to configure your RegionServers to fail to start if a given compressor is not available.
+====
+
+Each of these library locations need to be added to the environment variable [var]+HBASE_LIBRARY_PATH+ for the operating system user that runs HBase.
+You need to restart the RegionServer for the changes to take effect.
+
+[[compression.test]]
+.CompressionTest
+
+You can use the CompressionTest tool to verify that your compressor is available to HBase:
+
+----
+
+ $ hbase org.apache.hadoop.hbase.util.CompressionTest hdfs://host/path/to/hbase snappy
+----
+
+[[hbase.regionserver.codecs]]
+.Enforce Compression Settings On a RegionServer
+
+You can configure a RegionServer so that it will fail to restart if compression is configured incorrectly, by adding the option hbase.regionserver.codecs to the [path]_hbase-site.xml_, and setting its value to a comma-separated list of codecs that need to be available.
+For example, if you set this property to [literal]+lzo,gz+, the RegionServer would fail to start if both compressors were not available.
+This would prevent a new server from being added to the cluster without having codecs configured properly.
+
+[[changing.compression]]
+==== Enable Compression On a ColumnFamily
+
+To enable compression for a ColumnFamily, use an [code]+alter+ command.
+You do not need to re-create the table or copy data.
+If you are changing codecs, be sure the old codec is still available until all the old StoreFiles have been compacted.
+
+.Enabling Compression on a ColumnFamily of an Existing Table using HBaseShell
+====
+----
+
+hbase> disable 'test'
+hbase> alter 'test', {NAME => 'cf', COMPRESSION => 'GZ'}
+hbase> enable 'test'
+----
+====
+
+.Creating a New Table with Compression On a ColumnFamily
+====
+----
+
+hbase> create 'test2', { NAME => 'cf2', COMPRESSION => 'SNAPPY' }
+----
+====
+
+.Verifying a ColumnFamily's Compression Settings
+====
+----
+
+hbase> describe 'test'
+DESCRIPTION ENABLED
+ 'test', {NAME => 'cf', DATA_BLOCK_ENCODING => 'NONE false
+ ', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0',
+ VERSIONS => '1', COMPRESSION => 'GZ', MIN_VERSIONS
+ => '0', TTL => 'FOREVER', KEEP_DELETED_CELLS => 'fa
+ lse', BLOCKSIZE => '65536', IN_MEMORY => 'false', B
+ LOCKCACHE => 'true'}
+1 row(s) in 0.1070 seconds
+----
+====
+
+==== Testing Compression Performance
+
+HBase includes a tool called LoadTestTool which provides mechanisms to test your compression performance.
+You must specify either [literal]+-write+ or [literal]+-update-read+ as your first parameter, and if you do not specify another parameter, usage advice is printed for each option.
+
+.+LoadTestTool+ Usage
+====
+----
+
+$ bin/hbase org.apache.hadoop.hbase.util.LoadTestTool -h
+usage: bin/hbase org.apache.hadoop.hbase.util.LoadTestTool
+Options:
+ -batchupdate Whether to use batch as opposed to separate
+ updates for every column in a row
+ -bloom Bloom filter type, one of [NONE, ROW, ROWCOL]
+ -compression Compression type, one of [LZO, GZ, NONE, SNAPPY,
+ LZ4]
+ -data_block_encoding Encoding algorithm (e.g. prefix compression) to
+ use for data blocks in the test column family, one
+ of [NONE, PREFIX, DIFF, FAST_DIFF, PREFIX_TREE].
+ -encryption Enables transparent encryption on the test table,
+ one of [AES]
+ -generator The class which generates load for the tool. Any
+ args for this class can be passed as colon
+ separated after class name
+ -h,--help Show usage
+ -in_memory Tries to keep the HFiles of the CF inmemory as far
+ as possible. Not guaranteed that reads are always
+ served from inmemory
+ -init_only Initialize the test table only, don't do any
+ loading
+ -key_window The 'key window' to maintain between reads and
+ writes for concurrent write/read workload. The
+ default is 0.
+ -max_read_errors The maximum number of read errors to tolerate
+ before terminating all reader threads. The default
+ is 10.
+ -multiput Whether to use multi-puts as opposed to separate
+ puts for every column in a row
+ -num_keys The number of keys to read/write
+ -num_tables A positive integer number. When a number n is
+ speicfied, load test tool will load n table
+ parallely. -tn parameter value becomes table name
+ prefix. Each table name is in format
+ _1..._n
+ -read [:<#threads=20>]
+ -regions_per_server A positive integer number. When a number n is
+ specified, load test tool will create the test
+ table with n regions per server
+ -skip_init Skip the initialization; assume test table already
+ exists
+ -start_key The first key to read/write (a 0-based index). The
+ default value is 0.
+ -tn The name of the table to read or write
+ -update [:<#threads=20>][:<#whether to
+ ignore nonce collisions=0>]
+ -write :[:<#threads=20>]
+ -zk ZK quorum as comma-separated host names without
+ port numbers
+ -zk_root name of parent znode in zookeeper
+----
+====
+
+.Example Usage of LoadTestTool
+====
+----
+
+$ hbase org.apache.hadoop.hbase.util.LoadTestTool -write 1:10:100 -num_keys 1000000
+ -read 100:30 -num_tables 1 -data_block_encoding NONE -tn load_test_tool_NONE
+----
+====
+
+[[data.block.encoding.enable]]
+== Enable Data Block Encoding
+
+Codecs are built into HBase so no extra configuration is needed.
+Codecs are enabled on a table by setting the [code]+DATA_BLOCK_ENCODING+ property.
+Disable the table before altering its DATA_BLOCK_ENCODING setting.
+Following is an example using HBase Shell:
+
+.Enable Data Block Encoding On a Table
+====
+----
+
+hbase> disable 'test'
+hbase> alter 'test', { NAME => 'cf', DATA_BLOCK_ENCODING => 'FAST_DIFF' }
+Updating all regions with the new schema...
+0/1 regions updated.
+1/1 regions updated.
+Done.
+0 row(s) in 2.2820 seconds
+hbase> enable 'test'
+0 row(s) in 0.1580 seconds
+----
+====
+
+.Verifying a ColumnFamily's Data Block Encoding
+====
+----
+
+hbase> describe 'test'
+DESCRIPTION ENABLED
+ 'test', {NAME => 'cf', DATA_BLOCK_ENCODING => 'FAST true
+ _DIFF', BLOOMFILTER => 'ROW', REPLICATION_SCOPE =>
+ '0', VERSIONS => '1', COMPRESSION => 'GZ', MIN_VERS
+ IONS => '0', TTL => 'FOREVER', KEEP_DELETED_CELLS =
+ > 'false', BLOCKSIZE => '65536', IN_MEMORY => 'fals
+ e', BLOCKCACHE => 'true'}
+1 row(s) in 0.0650 seconds
+----
+====
+
+:numbered:
+
+ifdef::backend-docbook[]
+[index]
+== Index
+// Generated automatically by the DocBook toolchain.
+endif::backend-docbook[]
diff --git a/src/main/asciidoc/_chapters/configuration.adoc b/src/main/asciidoc/_chapters/configuration.adoc
new file mode 100644
index 0000000..7fb04b6
--- /dev/null
+++ b/src/main/asciidoc/_chapters/configuration.adoc
@@ -0,0 +1,1076 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[[configuration]]
+= Apache HBase Configuration
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+This chapter expands upon the <> chapter to further explain configuration of Apache HBase.
+Please read this chapter carefully, especially <> to ensure that your HBase testing and deployment goes smoothly, and prevent data loss.
+
+Apache HBase uses the same configuration system as Apache Hadoop.
+All configuration files are located in the [path]_conf/_ directory, which needs to be kept in sync for each node on your cluster.
+
+.HBase Configuration Files
+[path]_backup-masters_::
+ Not present by default.
+ A plain-text file which lists hosts on which the Master should start a backup Master process, one host per line.
+
+[path]_hadoop-metrics2-hbase.properties_::
+ Used to connect HBase Hadoop's Metrics2 framework.
+ See the link:http://wiki.apache.org/hadoop/HADOOP-6728-MetricsV2[Hadoop Wiki
+ entry] for more information on Metrics2.
+ Contains only commented-out examples by default.
+
+[path]_hbase-env.cmd_ and [path]_hbase-env.sh_::
+ Script for Windows and Linux / Unix environments to set up the working environment for HBase, including the location of Java, Java options, and other environment variables.
+ The file contains many commented-out examples to provide guidance.
+
+[path]_hbase-policy.xml_::
+ The default policy configuration file used by RPC servers to make authorization decisions on client requests.
+ Only used if HBase security (<>) is enabled.
+
+[path]_hbase-site.xml_::
+ The main HBase configuration file.
+ This file specifies configuration options which override HBase's default configuration.
+ You can view (but do not edit) the default configuration file at [path]_docs/hbase-default.xml_.
+ You can also view the entire effective configuration for your cluster (defaults and overrides) in the [label]#HBase Configuration# tab of the HBase Web UI.
+
+[path]_log4j.properties_::
+ Configuration file for HBase logging via [code]+log4j+.
+
+[path]_regionservers_::
+ A plain-text file containing a list of hosts which should run a RegionServer in your HBase cluster.
+ By default this file contains the single entry [literal]+localhost+.
+ It should contain a list of hostnames or IP addresses, one per line, and should only contain [literal]+localhost+ if each node in your cluster will run a RegionServer on its [literal]+localhost+ interface.
+
+.Checking XML Validity
+[TIP]
+====
+When you edit XML, it is a good idea to use an XML-aware editor to be sure that your syntax is correct and your XML is well-formed.
+You can also use the +xmllint+ utility to check that your XML is well-formed.
+By default, +xmllint+ re-flows and prints the XML to standard output.
+To check for well-formedness and only print output if errors exist, use the command +xmllint -noout
+ filename.xml+.
+====
+
+.Keep Configuration In Sync Across the Cluster
+[WARNING]
+====
+When running in distributed mode, after you make an edit to an HBase configuration, make sure you copy the content of the [path]_conf/_ directory to all nodes of the cluster.
+HBase will not do this for you.
+Use +rsync+, +scp+, or another secure mechanism for copying the configuration files to your nodes.
+For most configuration, a restart is needed for servers to pick up changes An exception is dynamic configuration.
+to be described later below.
+====
+
+[[basic.prerequisites]]
+== Basic Prerequisites
+
+This section lists required services and some required system configuration.
+
+.Java
+[cols="2", options="header"]
+|===
+| HBase Version | Support
+| JDK 6 | Not Supported
+| JDK 7 | Running with JDK 8 will work but is not well tested.
+| JDK 8 | Running with JDK 8 works but is not well tested. Building with JDK 8 would require removal of the deprecated remove() method of the PoolMap class and is under consideration. See ee HBASE-7608 for more information about JDK 8 support.
+|===
+
+NOTE: In HBase 0.98.5 and newer, you must set [var]+JAVA_HOME+ on each node of your cluster. [path]_hbase-env.sh_ provides a handy mechanism to do this.
+
+.Operating System Utilities
+ssh::
+ HBase uses the Secure Shell (ssh) command and utilities extensively to communicate between cluster nodes. Each server in the cluster must be running +ssh+ so that the Hadoop and HBase daemons can be managed. You must be able to connect to all nodes via SSH, including the local node, from the Master as well as any backup Master, using a shared key rather than a password. You can see the basic methodology for such a set-up in Linux or Unix systems at <>. If your cluster nodes use OS X, see the section, link:http://wiki.apache.org/hadoop/Running_Hadoop_On_OS_X_10.5_64-bit_%28Single-Node_Cluster%29[SSH: Setting up Remote Desktop and Enabling Self-Login] on the Hadoop wiki.
+
+DNS::
+ HBase uses the local hostname to self-report its IP address. Both forward and reverse DNS resolving must work in versions of HBase previous to 0.92.0. The link:https://github.com/sujee/hadoop-dns-checker[hadoop-dns-checker] tool can be used to verify DNS is working correctly on the cluster. The project README file provides detailed instructions on usage.
+
+Loopback IP::
+ Prior to hbase-0.96.0, HBase only used the IP address [systemitem]+127.0.0.1+ to refer to [code]+localhost+, and this could not be configured.
+ See <>.
+
+NTP::
+ The clocks on cluster nodes should be synchronized. A small amount of variation is acceptable, but larger amounts of skew can cause erratic and unexpected behavior. Time synchronization is one of the first things to check if you see unexplained problems in your cluster. It is recommended that you run a Network Time Protocol (NTP) service, or another time-synchronization mechanism, on your cluster, and that all nodes look to the same service for time synchronization. See the link:http://www.tldp.org/LDP/sag/html/basic-ntp-config.html[Basic NTP Configuration] at [citetitle]_The Linux Documentation Project (TLDP)_ to set up NTP.
+
+Limits on Number of Files and Processes (ulimit)::
+ Apache HBase is a database. It requires the ability to open a large number of files at once. Many Linux distributions limit the number of files a single user is allowed to open to [literal]+1024+ (or [literal]+256+ on older versions of OS X). You can check this limit on your servers by running the command +ulimit -n+ when logged in as the user which runs HBase. See <> for some of the problems you may experience if the limit is too low. You may also notice errors such as the following:
++
+----
+2010-04-06 03:04:37,542 INFO org.apache.hadoop.hdfs.DFSClient: Exception increateBlockOutputStream java.io.EOFException
+2010-04-06 03:04:37,542 INFO org.apache.hadoop.hdfs.DFSClient: Abandoning block blk_-6935524980745310745_1391901
+----
++
+It is recommended to raise the ulimit to at least 10,000, but more likely 10,240, because the value is usually expressed in multiples of 1024. Each ColumnFamily has at least one StoreFile, and possibly more than 6 StoreFiles if the region is under load. The number of open files required depends upon the number of ColumnFamilies and the number of regions. The following is a rough formula for calculating the potential number of open files on a RegionServer.
++
+.Calculate the Potential Number of Open Files
+----
+(StoreFiles per ColumnFamily) x (regions per RegionServer)
+----
++
+For example, assuming that a schema had 3 ColumnFamilies per region with an average of 3 StoreFiles per ColumnFamily, and there are 100 regions per RegionServer, the JVM will open `3 * 3 * 100 = 900` file descriptors, not counting open JAR files, configuration files, and others. Opening a file does not take many resources, and the risk of allowing a user to open too many files is minimal.
++
+Another related setting is the number of processes a user is allowed to run at once. In Linux and Unix, the number of processes is set using the ulimit -u command. This should not be confused with the nproc command, which controls the number of CPUs available to a given user. Under load, a nproc that is too low can cause OutOfMemoryError exceptions. See Jack Levin's major hdfs issues thread on the hbase-users mailing list, from 2011.
++
+Configuring the maximum number of ile descriptors and processes for the user who is running the HBase process is an operating system configuration, rather than an HBase configuration. It is also important to be sure that the settings are changed for the user that actually runs HBase. To see which user started HBase, and that user's ulimit configuration, look at the first line of the HBase log for that instance. A useful read setting config on you hadoop cluster is Aaron Kimballs' Configuration Parameters: What can you just ignore?
++
+.`ulimit` Settings on Ubuntu
+====
+To configure ulimit settings on Ubuntu, edit /etc/security/limits.conf, which is a space-delimited file with four columns. Refer to the man page for limits.conf for details about the format of this file. In the following example, the first line sets both soft and hard limits for the number of open files (nofile) to 32768 for the operating system user with the username hadoop. The second line sets the number of processes to 32000 for the same user.
+----
+hadoop - nofile 32768
+hadoop - nproc 32000
+----
+The settings are only applied if the Pluggable Authentication Module (PAM) environment is directed to use them. To configure PAM to use these limits, be sure that the /etc/pam.d/common-session file contains the following line:
+----
+session required pam_limits.so
+----
+====
+
+Windows::
+ Prior to HBase 0.96, testing for running HBase on Microsoft Windows was limited.
+ Running a on Windows nodes is not recommended for production systems.
+
+
+[[hadoop]]
+=== link:http://hadoop.apache.org[Hadoop](((Hadoop)))
+
+The following table summarizes the versions of Hadoop supported with each version of HBase.
+Based on the version of HBase, you should select the most appropriate version of Hadoop.
+You can use Apache Hadoop, or a vendor's distribution of Hadoop.
+No distinction is made here.
+See link:http://wiki.apache.org/hadoop/Distributions%20and%20Commercial%20Support for information about vendors of Hadoop.
+
+.Hadoop 2.x is recommended.
+[TIP]
+====
+Hadoop 2.x is faster and includes features, such as short-circuit reads, which will help improve your HBase random read profile.
+Hadoop 2.x also includes important bug fixes that will improve your overall HBase experience.
+HBase 0.98 drops support for Hadoop 1.0, deprecates use of Hadoop 1.1+, and HBase 1.0 will not support Hadoop 1.x.
+====
+
+Use the following legend to interpret this table:
+
+.Hadoop version support matrix
+[cols="1,1,1,1,1,1", options="header"]
+|===
+| | HBase-0.92.x | HBase-0.94.x | HBase-0.96.x | HBase-0.98.x (Support for Hadoop 1.1+ is deprecated.) | HBase-1.0.x (Hadoop 1.x is NOT supported)
+|Hadoop-0.20.205 | S | X | X | X | X
+|Hadoop-0.22.x | S | X | X | X | X
+|Hadoop-1.0.x |X | X | X | X | X
+|Hadoop-1.1.x | NT | S | S | NT | X
+|Hadoop-0.23.x | X | S | NT | X | X
+|Hadoop-2.0.x-alpha | X | NT | X | X | X
+|Hadoop-2.1.0-beta | X | NT | S | X | X
+|Hadoop-2.2.0 | X | NT | S | S | NT
+|Hadoop-2.3.x | X | NT | S | S | NT
+|Hadoop-2.4.x | X | NT | S | S | S
+|Hadoop-2.5.x | X | NT | S | S | S
+|===
+
+.Replace the Hadoop Bundled With HBase!
+[NOTE]
+====
+Because HBase depends on Hadoop, it bundles an instance of the Hadoop jar under its [path]_lib_ directory.
+The bundled jar is ONLY for use in standalone mode.
+In distributed mode, it is _critical_ that the version of Hadoop that is out on your cluster match what is under HBase.
+Replace the hadoop jar found in the HBase lib directory with the hadoop jar you are running on your cluster to avoid version mismatch issues.
+Make sure you replace the jar in HBase everywhere on your cluster.
+Hadoop version mismatch issues have various manifestations but often all looks like its hung up.
+====
+
+[[hadoop2.hbase_0.94]]
+==== Apache HBase 0.94 with Hadoop 2
+
+To get 0.94.x to run on hadoop 2.2.0, you need to change the hadoop 2 and protobuf versions in the [path]_pom.xml_: Here is a diff with pom.xml changes:
+
+[source]
+----
+$ svn diff pom.xml
+Index: pom.xml
+===================================================================
+--- pom.xml (revision 1545157)
++++ pom.xml (working copy)
+@@ -1034,7 +1034,7 @@
+ 1.4.3
+ 1.2.16
+ 1.8.5
+- 2.4.0a
++ 2.5.0
+ 1.0.1
+ 0.8.0
+ 3.4.5
+@@ -2241,7 +2241,7 @@
+
+
+
+- 2.0.0-alpha
++ 2.2.0
+ 1.6.1
+
+
+----
+
+The next step is to regenerate Protobuf files and assuming that the Protobuf has been installed:
+
+* Go to the hbase root folder, using the command line;
+* Type the following commands:
++
+
+[source,bourne]
+----
+$ protoc -Isrc/main/protobuf --java_out=src/main/java src/main/protobuf/hbase.proto
+----
++
+
+[source,bourne]
+----
+$ protoc -Isrc/main/protobuf --java_out=src/main/java src/main/protobuf/ErrorHandling.proto
+----
+
+
+Building against the hadoop 2 profile by running something like the following command:
+
+----
+$ mvn clean install assembly:single -Dhadoop.profile=2.0 -DskipTests
+----
+
+[[hadoop.hbase_0.94]]
+==== Apache HBase 0.92 and 0.94
+
+HBase 0.92 and 0.94 versions can work with Hadoop versions, 0.20.205, 0.22.x, 1.0.x, and 1.1.x.
+HBase-0.94 can additionally work with Hadoop-0.23.x and 2.x, but you may have to recompile the code using the specific maven profile (see top level pom.xml)
+
+[[hadoop.hbase_0.96]]
+==== Apache HBase 0.96
+
+As of Apache HBase 0.96.x, Apache Hadoop 1.0.x at least is required.
+Hadoop 2 is strongly encouraged (faster but also has fixes that help MTTR). We will no longer run properly on older Hadoops such as 0.20.205 or branch-0.20-append.
+Do not move to Apache HBase 0.96.x if you cannot upgrade your Hadoop.. See link:http://search-hadoop.com/m/7vFVx4EsUb2[HBase, mail # dev - DISCUSS:
+ Have hbase require at least hadoop 1.0.0 in hbase 0.96.0?]
+
+[[hadoop.older.versions]]
+==== Hadoop versions 0.20.x - 1.x
+
+HBase will lose data unless it is running on an HDFS that has a durable [code]+sync+ implementation.
+DO NOT use Hadoop 0.20.2, Hadoop 0.20.203.0, and Hadoop 0.20.204.0 which DO NOT have this attribute.
+Currently only Hadoop versions 0.20.205.x or any release in excess of this version -- this includes hadoop-1.0.0 -- have a working, durable sync.
+The Cloudera blog post link:http://www.cloudera.com/blog/2012/01/an-update-on-apache-hadoop-1-0/[An
+ update on Apache Hadoop 1.0] by Charles Zedlweski has a nice exposition on how all the Hadoop versions relate.
+Its worth checking out if you are having trouble making sense of the Hadoop version morass.
+
+Sync has to be explicitly enabled by setting [var]+dfs.support.append+ equal to true on both the client side -- in [path]_hbase-site.xml_ -- and on the serverside in [path]_hdfs-site.xml_ (The sync facility HBase needs is a subset of the append code path).
+
+[source,xml]
+----
+
+
+ dfs.support.append
+ true
+
+----
+
+You will have to restart your cluster after making this edit.
+Ignore the chicken-little comment you'll find in the [path]_hdfs-default.xml_ in the description for the [var]+dfs.support.append+ configuration.
+
+[[hadoop.security]]
+==== Apache HBase on Secure Hadoop
+
+Apache HBase will run on any Hadoop 0.20.x that incorporates Hadoop security features as long as you do as suggested above and replace the Hadoop jar that ships with HBase with the secure version.
+If you want to read more about how to setup Secure HBase, see <>.
+
+[var]+dfs.datanode.max.transfer.threads+
+[[dfs.datanode.max.transfer.threads]]
+==== (((dfs.datanode.max.transfer.threads)))
+
+An HDFS datanode has an upper bound on the number of files that it will serve at any one time.
+Before doing any loading, make sure you have configured Hadoop's [path]_conf/hdfs-site.xml_, setting the [var]+dfs.datanode.max.transfer.threads+ value to at least the following:
+
+[source,xml]
+----
+
+
+ dfs.datanode.max.transfer.threads
+ 4096
+
+----
+
+Be sure to restart your HDFS after making the above configuration.
+
+Not having this configuration in place makes for strange-looking failures.
+One manifestation is a complaint about missing blocks.
+For example:
+
+----
+10/12/08 20:10:31 INFO hdfs.DFSClient: Could not obtain block
+ blk_XXXXXXXXXXXXXXXXXXXXXX_YYYYYYYY from any node: java.io.IOException: No live nodes
+ contain current block. Will get new block locations from namenode and retry...
+----
+
+See also <> and note that this property was previously known as [var]+dfs.datanode.max.xcievers+ (e.g. link:http://ccgtech.blogspot.com/2010/02/hadoop-hdfs-deceived-by-xciever.html[
+ Hadoop HDFS: Deceived by Xciever]).
+
+[[zookeeper.requirements]]
+=== ZooKeeper Requirements
+
+ZooKeeper 3.4.x is required as of HBase 1.0.0.
+HBase makes use of the [method]+multi+ functionality that is only available since 3.4.0 (The +useMulti+ is defaulted true in HBase 1.0.0). See link:[HBASE-12241 The crash of regionServer when taking deadserver's replication queue breaks replication] and link:[Use ZK.multi when available for HBASE-6710 0.92/0.94 compatibility fix] for background.
+
+[[standalone_dist]]
+== HBase run modes: Standalone and Distributed
+
+HBase has two run modes: <> and <>.
+Out of the box, HBase runs in standalone mode.
+Whatever your mode, you will need to configure HBase by editing files in the HBase [path]_conf_ directory.
+At a minimum, you must edit [code]+conf/hbase-env.sh+ to tell HBase which +java+ to use.
+In this file you set HBase environment variables such as the heapsize and other options for the +JVM+, the preferred location for log files, etc.
+Set [var]+JAVA_HOME+ to point at the root of your +java+ install.
+
+[[standalone]]
+=== Standalone HBase
+
+This is the default mode.
+Standalone mode is what is described in the <> section.
+In standalone mode, HBase does not use HDFS -- it uses the local filesystem instead -- and it runs all HBase daemons and a local ZooKeeper all up in the same JVM.
+Zookeeper binds to a well known port so clients may talk to HBase.
+
+=== Distributed
+
+Distributed mode can be subdivided into distributed but all daemons run on a single node -- a.k.a _pseudo-distributed_-- and _fully-distributed_ where the daemons are spread across all nodes in the cluster.
+The pseudo-distributed vs fully-distributed nomenclature comes from Hadoop.
+
+Pseudo-distributed mode can run against the local filesystem or it can run against an instance of the _Hadoop Distributed File System_ (HDFS). Fully-distributed mode can ONLY run on HDFS.
+See the Hadoop link:http://hadoop.apache.org/common/docs/r1.1.1/api/overview-summary.html#overview_description[
+ requirements and instructions] for how to set up HDFS for Hadoop 1.x.
+A good walk-through for setting up HDFS on Hadoop 2 is at link:http://www.alexjf.net/blog/distributed-systems/hadoop-yarn-installation-definitive-guide.
+
+Below we describe the different distributed setups.
+Starting, verification and exploration of your install, whether a _pseudo-distributed_ or _fully-distributed_ configuration is described in a section that follows, <>.
+The same verification script applies to both deploy types.
+
+[[pseudo]]
+==== Pseudo-distributed
+
+.Pseudo-Distributed Quickstart
+[NOTE]
+====
+A quickstart has been added to the <> chapter.
+See <>.
+Some of the information that was originally in this section has been moved there.
+====
+
+A pseudo-distributed mode is simply a fully-distributed mode run on a single host.
+Use this configuration testing and prototyping on HBase.
+Do not use this configuration for production nor for evaluating HBase performance.
+
+[[fully_dist]]
+=== Fully-distributed
+
+By default, HBase runs in standalone mode.
+Both standalone mode and pseudo-distributed mode are provided for the purposes of small-scale testing.
+For a production environment, distributed mode is appropriate.
+In distributed mode, multiple instances of HBase daemons run on multiple servers in the cluster.
+
+Just as in pseudo-distributed mode, a fully distributed configuration requires that you set the [code]+hbase-cluster.distributed+ property to [literal]+true+.
+Typically, the [code]+hbase.rootdir+ is configured to point to a highly-available HDFS filesystem.
+
+In addition, the cluster is configured so that multiple cluster nodes enlist as RegionServers, ZooKeeper QuorumPeers, and backup HMaster servers.
+These configuration basics are all demonstrated in <>.
+
+.Distributed RegionServers
+Typically, your cluster will contain multiple RegionServers all running on different servers, as well as primary and backup Master and Zookeeper daemons.
+The [path]_conf/regionservers_ file on the master server contains a list of hosts whose RegionServers are associated with this cluster.
+Each host is on a separate line.
+All hosts listed in this file will have their RegionServer processes started and stopped when the master server starts or stops.
+
+.ZooKeeper and HBase
+See section <> for ZooKeeper setup for HBase.
+
+.Example Distributed HBase Cluster
+====
+This is a bare-bones [path]_conf/hbase-site.xml_ for a distributed HBase cluster.
+A cluster that is used for real-world work would contain more custom configuration parameters.
+Most HBase configuration directives have default values, which are used unless the value is overridden in the [path]_hbase-site.xml_.
+See <> for more information.
+
+[source,xml]
+----
+
+
+
+ hbase.rootdir
+ hdfs://namenode.example.org:8020/hbase
+
+
+ hbase.cluster.distributed
+ true
+
+
+ hbase.zookeeper.quorum
+ node-a.example.com,node-b.example.com,node-c.example.com
+
+
+----
+
+This is an example [path]_conf/regionservers_ file, which contains a list of each node that should run a RegionServer in the cluster.
+These nodes need HBase installed and they need to use the same contents of the [path]_conf/_ directory as the Master server..
+
+[source]
+----
+
+node-a.example.com
+node-b.example.com
+node-c.example.com
+----
+
+This is an example [path]_conf/backup-masters_ file, which contains a list of each node that should run a backup Master instance.
+The backup Master instances will sit idle unless the main Master becomes unavailable.
+
+[source]
+----
+
+node-b.example.com
+node-c.example.com
+----
+====
+
+.Distributed HBase Quickstart
+See <> for a walk-through of a simple three-node cluster configuration with multiple ZooKeeper, backup HMaster, and RegionServer instances.
+
+.Procedure: HDFS Client Configuration
+. Of note, if you have made HDFS client configuration on your Hadoop cluster, such as configuration directives for HDFS clients, as opposed to server-side configurations, you must use one of the following methods to enable HBase to see and use these configuration changes:
++
+a. Add a pointer to your [var]+HADOOP_CONF_DIR+ to the [var]+HBASE_CLASSPATH+ environment variable in [path]_hbase-env.sh_.
+b. Add a copy of [path]_hdfs-site.xml_ (or [path]_hadoop-site.xml_) or, better, symlinks, under [path]_${HBASE_HOME}/conf_, or
+c. if only a small set of HDFS client configurations, add them to [path]_hbase-site.xml_.
+
+
+An example of such an HDFS client configuration is [var]+dfs.replication+.
+If for example, you want to run with a replication factor of 5, hbase will create files with the default of 3 unless you do the above to make the configuration available to HBase.
+
+[[confirm]]
+== Running and Confirming Your Installation
+
+Make sure HDFS is running first.
+Start and stop the Hadoop HDFS daemons by running [path]_bin/start-hdfs.sh_ over in the [var]+HADOOP_HOME+ directory.
+You can ensure it started properly by testing the +put+ and +get+ of files into the Hadoop filesystem.
+HBase does not normally use the mapreduce daemons.
+These do not need to be started.
+
+_If_ you are managing your own ZooKeeper, start it and confirm its running else, HBase will start up ZooKeeper for you as part of its start process.
+
+Start HBase with the following command:
+
+----
+bin/start-hbase.sh
+----
+
+Run the above from the [var]+HBASE_HOME+ directory.
+
+You should now have a running HBase instance.
+HBase logs can be found in the [path]_logs_ subdirectory.
+Check them out especially if HBase had trouble starting.
+
+HBase also puts up a UI listing vital attributes.
+By default its deployed on the Master host at port 16010 (HBase RegionServers listen on port 16020 by default and put up an informational http server at 16030). If the Master were running on a host named [var]+master.example.org+ on the default port, to see the Master's homepage you'd point your browser at [path]_http://master.example.org:16010_.
+
+Prior to HBase 0.98, the default ports the master ui was deployed on port 16010, and the HBase RegionServers would listen on port 16020 by default and put up an informational http server at 16030.
+
+Once HBase has started, see the <> for how to create tables, add data, scan your insertions, and finally disable and drop your tables.
+
+To stop HBase after exiting the HBase shell enter
+
+----
+$ ./bin/stop-hbase.sh
+stopping hbase...............
+----
+
+Shutdown can take a moment to complete.
+It can take longer if your cluster is comprised of many machines.
+If you are running a distributed operation, be sure to wait until HBase has shut down completely before stopping the Hadoop daemons.
+
+[[config.files]]
+== Configuration Files
+
+[[hbase.site]]
+=== [path]_hbase-site.xml_ and [path]_hbase-default.xml_
+
+Just as in Hadoop where you add site-specific HDFS configuration to the [path]_hdfs-site.xml_ file, for HBase, site specific customizations go into the file [path]_conf/hbase-site.xml_.
+For the list of configurable properties, see <> below or view the raw [path]_hbase-default.xml_ source file in the HBase source code at [path]_src/main/resources_.
+
+Not all configuration options make it out to [path]_hbase-default.xml_.
+Configuration that it is thought rare anyone would change can exist only in code; the only way to turn up such configurations is via a reading of the source code itself.
+
+Currently, changes here will require a cluster restart for HBase to notice the change.
+// hbase/src/main/asciidoc
+//
+include::../../../../target/asciidoc/hbase-default.adoc[]
+
+
+[[hbase.env.sh]]
+=== [path]_hbase-env.sh_
+
+Set HBase environment variables in this file.
+Examples include options to pass the JVM on start of an HBase daemon such as heap size and garbage collector configs.
+You can also set configurations for HBase configuration, log directories, niceness, ssh options, where to locate process pid files, etc.
+Open the file at [path]_conf/hbase-env.sh_ and peruse its content.
+Each option is fairly well documented.
+Add your own environment variables here if you want them read by HBase daemons on startup.
+
+Changes here will require a cluster restart for HBase to notice the change.
+
+[[log4j]]
+=== [path]_log4j.properties_
+
+Edit this file to change rate at which HBase files are rolled and to change the level at which HBase logs messages.
+
+Changes here will require a cluster restart for HBase to notice the change though log levels can be changed for particular daemons via the HBase UI.
+
+[[client_dependencies]]
+=== Client configuration and dependencies connecting to an HBase cluster
+
+If you are running HBase in standalone mode, you don't need to configure anything for your client to work provided that they are all on the same machine.
+
+Since the HBase Master may move around, clients bootstrap by looking to ZooKeeper for current critical locations.
+ZooKeeper is where all these values are kept.
+Thus clients require the location of the ZooKeeper ensemble information before they can do anything else.
+Usually this the ensemble location is kept out in the [path]_hbase-site.xml_ and is picked up by the client from the [var]+CLASSPATH+.
+
+If you are configuring an IDE to run a HBase client, you should include the [path]_conf/_ directory on your classpath so [path]_hbase-site.xml_ settings can be found (or add [path]_src/test/resources_ to pick up the hbase-site.xml used by tests).
+
+Minimally, a client of HBase needs several libraries in its [var]+CLASSPATH+ when connecting to a cluster, including:
+[source]
+----
+
+commons-configuration (commons-configuration-1.6.jar)
+commons-lang (commons-lang-2.5.jar)
+commons-logging (commons-logging-1.1.1.jar)
+hadoop-core (hadoop-core-1.0.0.jar)
+hbase (hbase-0.92.0.jar)
+log4j (log4j-1.2.16.jar)
+slf4j-api (slf4j-api-1.5.8.jar)
+slf4j-log4j (slf4j-log4j12-1.5.8.jar)
+zookeeper (zookeeper-3.4.2.jar)
+----
+
+An example basic [path]_hbase-site.xml_ for client only might look as follows:
+[source,xml]
+----
+
+
+
+
+
+ hbase.zookeeper.quorum
+ example1,example2,example3
+ The directory shared by region servers.
+
+
+
+----
+
+[[java.client.config]]
+==== Java client configuration
+
+The configuration used by a Java client is kept in an link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HBaseConfiguration[HBaseConfiguration] instance.
+
+The factory method on HBaseConfiguration, [code]+HBaseConfiguration.create();+, on invocation, will read in the content of the first [path]_hbase-site.xml_ found on the client's [var]+CLASSPATH+, if one is present (Invocation will also factor in any [path]_hbase-default.xml_ found; an hbase-default.xml ships inside the [path]_hbase.X.X.X.jar_). It is also possible to specify configuration directly without having to read from a [path]_hbase-site.xml_.
+For example, to set the ZooKeeper ensemble for the cluster programmatically do as follows:
+
+[source,java]
+----
+Configuration config = HBaseConfiguration.create();
+config.set("hbase.zookeeper.quorum", "localhost"); // Here we are running zookeeper locally
+----
+
+If multiple ZooKeeper instances make up your ZooKeeper ensemble, they may be specified in a comma-separated list (just as in the [path]_hbase-site.xml_ file). This populated [class]+Configuration+ instance can then be passed to an link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HTable.html[HTable], and so on.
+
+[[example_config]]
+== Example Configurations
+
+=== Basic Distributed HBase Install
+
+Here is an example basic configuration for a distributed ten node cluster.
+The nodes are named [var]+example0+, [var]+example1+, etc., through node [var]+example9+ in this example.
+The HBase Master and the HDFS namenode are running on the node [var]+example0+.
+RegionServers run on nodes [var]+example1+-[var]+example9+.
+A 3-node ZooKeeper ensemble runs on [var]+example1+, [var]+example2+, and [var]+example3+ on the default ports.
+ZooKeeper data is persisted to the directory [path]_/export/zookeeper_.
+Below we show what the main configuration files -- [path]_hbase-site.xml_, [path]_regionservers_, and [path]_hbase-env.sh_ -- found in the HBase [path]_conf_ directory might look like.
+
+[[hbase_site]]
+==== [path]_hbase-site.xml_
+
+[source,bourne]
+----
+
+
+
+
+
+
+ hbase.zookeeper.quorum
+ example1,example2,example3
+ The directory shared by RegionServers.
+
+
+
+ hbase.zookeeper.property.dataDir
+ /export/zookeeper
+ Property from ZooKeeper config zoo.cfg.
+ The directory where the snapshot is stored.
+
+
+
+ hbase.rootdir
+ hdfs://example0:8020/hbase
+ The directory shared by RegionServers.
+
+
+
+ hbase.cluster.distributed
+ true
+ The mode the cluster will be in. Possible values are
+ false: standalone and pseudo-distributed setups with managed Zookeeper
+ true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh)
+
+
+
+----
+
+[[regionservers]]
+==== [path]_regionservers_
+
+In this file you list the nodes that will run RegionServers.
+In our case, these nodes are [var]+example1+-[var]+example9+.
+
+[source]
+----
+
+example1
+example2
+example3
+example4
+example5
+example6
+example7
+example8
+example9
+----
+
+[[hbase_env]]
+==== [path]_hbase-env.sh_
+
+The following lines in the [path]_hbase-env.sh_ file show how to set the [var]+JAVA_HOME+ environment variable (required for HBase 0.98.5 and newer) and set the heap to 4 GB (rather than the default value of 1 GB). If you copy and paste this example, be sure to adjust the [var]+JAVA_HOME+ to suit your environment.
+
+----
+
+# The java implementation to use.
+export JAVA_HOME=/usr/java/jdk1.7.0/
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+export HBASE_HEAPSIZE=4096
+----
+
+Use +rsync+ to copy the content of the [path]_conf_ directory to all nodes of the cluster.
+
+[[important_configurations]]
+== The Important Configurations
+
+Below we list what the _important_ Configurations.
+We've divided this section into required configuration and worth-a-look recommended configs.
+
+[[required_configuration]]
+=== Required Configurations
+
+Review the <> and <> sections.
+
+[[big.cluster.config]]
+==== Big Cluster Configurations
+
+If a cluster with a lot of regions, it is possible if an eager beaver regionserver checks in soon after master start while all the rest in the cluster are laggardly, this first server to checkin will be assigned all regions.
+If lots of regions, this first server could buckle under the load.
+To prevent the above scenario happening up the [var]+hbase.master.wait.on.regionservers.mintostart+ from its default value of 1.
+See link:https://issues.apache.org/jira/browse/HBASE-6389[HBASE-6389 Modify the
+ conditions to ensure that Master waits for sufficient number of Region Servers before
+ starting region assignments] for more detail.
+
+[[backup.master.fail.fast]]
+==== If a backup Master, making primary Master fail fast
+
+If the primary Master loses its connection with ZooKeeper, it will fall into a loop where it keeps trying to reconnect.
+Disable this functionality if you are running more than one Master: i.e.
+a backup Master.
+Failing to do so, the dying Master may continue to receive RPCs though another Master has assumed the role of primary.
+See the configuration <>.
+
+=== Recommended Configurations
+
+[[recommended_configurations.zk]]
+==== ZooKeeper Configuration
+
+[[sect.zookeeper.session.timeout]]
+===== [var]+zookeeper.session.timeout+
+
+The default timeout is three minutes (specified in milliseconds). This means that if a server crashes, it will be three minutes before the Master notices the crash and starts recovery.
+You might like to tune the timeout down to a minute or even less so the Master notices failures the sooner.
+Before changing this value, be sure you have your JVM garbage collection configuration under control otherwise, a long garbage collection that lasts beyond the ZooKeeper session timeout will take out your RegionServer (You might be fine with this -- you probably want recovery to start on the server if a RegionServer has been in GC for a long period of time).
+
+To change this configuration, edit [path]_hbase-site.xml_, copy the changed file around the cluster and restart.
+
+We set this value high to save our having to field noob questions up on the mailing lists asking why a RegionServer went down during a massive import.
+The usual cause is that their JVM is untuned and they are running into long GC pauses.
+Our thinking is that while users are getting familiar with HBase, we'd save them having to know all of its intricacies.
+Later when they've built some confidence, then they can play with configuration such as this.
+
+[[zookeeper.instances]]
+===== Number of ZooKeeper Instances
+
+See <>.
+
+[[recommended.configurations.hdfs]]
+==== HDFS Configurations
+
+[[dfs.datanode.failed.volumes.tolerated]]
+===== dfs.datanode.failed.volumes.tolerated
+
+This is the "...number of volumes that are allowed to fail before a datanode stops offering service.
+By default any volume failure will cause a datanode to shutdown" from the [path]_hdfs-default.xml_ description.
+If you have > three or four disks, you might want to set this to 1 or if you have many disks, two or more.
+
+[[hbase.regionserver.handler.count_description]]
+==== [var]+hbase.regionserver.handler.count+
+
+This setting defines the number of threads that are kept open to answer incoming requests to user tables.
+The rule of thumb is to keep this number low when the payload per request approaches the MB (big puts, scans using a large cache) and high when the payload is small (gets, small puts, ICVs, deletes). The total size of the queries in progress is limited by the setting "hbase.ipc.server.max.callqueue.size".
+
+It is safe to set that number to the maximum number of incoming clients if their payload is small, the typical example being a cluster that serves a website since puts aren't typically buffered and most of the operations are gets.
+
+The reason why it is dangerous to keep this setting high is that the aggregate size of all the puts that are currently happening in a region server may impose too much pressure on its memory, or even trigger an OutOfMemoryError.
+A region server running on low memory will trigger its JVM's garbage collector to run more frequently up to a point where GC pauses become noticeable (the reason being that all the memory used to keep all the requests' payloads cannot be trashed, no matter how hard the garbage collector tries). After some time, the overall cluster throughput is affected since every request that hits that region server will take longer, which exacerbates the problem even more.
+
+You can get a sense of whether you have too little or too many handlers by <> on an individual RegionServer then tailing its logs (Queued requests consume memory).
+
+[[big_memory]]
+==== Configuration for large memory machines
+
+HBase ships with a reasonable, conservative configuration that will work on nearly all machine types that people might want to test with.
+If you have larger machines -- HBase has 8G and larger heap -- you might the following configuration options helpful.
+TODO.
+
+[[config.compression]]
+==== Compression
+
+You should consider enabling ColumnFamily compression.
+There are several options that are near-frictionless and in most all cases boost performance by reducing the size of StoreFiles and thus reducing I/O.
+
+See <> for more information.
+
+[[config.wals]]
+==== Configuring the size and number of WAL files
+
+HBase uses <> to recover the memstore data that has not been flushed to disk in case of an RS failure.
+These WAL files should be configured to be slightly smaller than HDFS block (by default, HDFS block is 64Mb and WAL file is ~60Mb).
+
+HBase also has a limit on number of WAL files, designed to ensure there's never too much data that needs to be replayed during recovery.
+This limit needs to be set according to memstore configuration, so that all the necessary data would fit.
+It is recommended to allocated enough WAL files to store at least that much data (when all memstores are close to full). For example, with 16Gb RS heap, default memstore settings (0.4), and default WAL file size (~60Mb), 16Gb*0.4/60, the starting point for WAL file count is ~109.
+However, as all memstores are not expected to be full all the time, less WAL files can be allocated.
+
+[[disable.splitting]]
+==== Managed Splitting
+
+HBase generally handles splitting your regions, based upon the settings in your [path]_hbase-default.xml_ and [path]_hbase-site.xml_ configuration files.
+Important settings include [var]+hbase.regionserver.region.split.policy+, [var]+hbase.hregion.max.filesize+, [var]+hbase.regionserver.regionSplitLimit+.
+A simplistic view of splitting is that when a region grows to [var]+hbase.hregion.max.filesize+, it is split.
+For most use patterns, most of the time, you should use automatic splitting.
+See <> for more information about manual region splitting.
+
+Instead of allowing HBase to split your regions automatically, you can choose to manage the splitting yourself.
+This feature was added in HBase 0.90.0.
+Manually managing splits works if you know your keyspace well, otherwise let HBase figure where to split for you.
+Manual splitting can mitigate region creation and movement under load.
+It also makes it so region boundaries are known and invariant (if you disable region splitting). If you use manual splits, it is easier doing staggered, time-based major compactions spread out your network IO load.
+
+.Disable Automatic Splitting
+To disable automatic splitting, set [var]+hbase.hregion.max.filesize+ to a very large value, such as [literal]+100 GB+ It is not recommended to set it to its absolute maximum value of [literal]+Long.MAX_VALUE+.
+
+.Automatic Splitting Is Recommended
+[NOTE]
+====
+If you disable automatic splits to diagnose a problem or during a period of fast data growth, it is recommended to re-enable them when your situation becomes more stable.
+The potential benefits of managing region splits yourself are not undisputed.
+====
+
+.Determine the Optimal Number of Pre-Split Regions
+The optimal number of pre-split regions depends on your application and environment.
+A good rule of thumb is to start with 10 pre-split regions per server and watch as data grows over time.
+It is better to err on the side of too few regions and perform rolling splits later.
+The optimal number of regions depends upon the largest StoreFile in your region.
+The size of the largest StoreFile will increase with time if the amount of data grows.
+The goal is for the largest region to be just large enough that the compaction selection algorithm only compacts it during a timed major compaction.
+Otherwise, the cluster can be prone to compaction storms where a large number of regions under compaction at the same time.
+It is important to understand that the data growth causes compaction storms, and not the manual split decision.
+
+If the regions are split into too many large regions, you can increase the major compaction interval by configuring [var]+HConstants.MAJOR_COMPACTION_PERIOD+.
+HBase 0.90 introduced [class]+org.apache.hadoop.hbase.util.RegionSplitter+, which provides a network-IO-safe rolling split of all regions.
+
+[[managed.compactions]]
+==== Managed Compactions
+
+By default, major compactions are scheduled to run once in a 7-day period.
+Prior to HBase 0.96.x, major compactions were scheduled to happen once per day by default.
+
+If you need to control exactly when and how often major compaction runs, you can disable managed major compactions.
+See the entry for [var]+hbase.hregion.majorcompaction+ in the <> table for details.
+
+.Do Not Disable Major Compactions
+[WARNING]
+====
+Major compactions are absolutely necessary for StoreFile clean-up.
+Do not disable them altogether.
+You can run major compactions manually via the HBase shell or via the link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html#majorCompact%28java.lang.String%29[HBaseAdmin
+ API].
+====
+
+For more information about compactions and the compaction file selection process, see <>
+
+[[spec.ex]]
+==== Speculative Execution
+
+Speculative Execution of MapReduce tasks is on by default, and for HBase clusters it is generally advised to turn off Speculative Execution at a system-level unless you need it for a specific case, where it can be configured per-job.
+Set the properties [var]+mapreduce.map.speculative+ and [var]+mapreduce.reduce.speculative+ to false.
+
+[[other_configuration]]
+=== Other Configurations
+
+[[balancer_config]]
+==== Balancer
+
+The balancer is a periodic operation which is run on the master to redistribute regions on the cluster.
+It is configured via [var]+hbase.balancer.period+ and defaults to 300000 (5 minutes).
+
+See <> for more information on the LoadBalancer.
+
+[[disabling.blockcache]]
+==== Disabling Blockcache
+
+Do not turn off block cache (You'd do it by setting [var]+hbase.block.cache.size+ to zero). Currently we do not do well if you do this because the regionserver will spend all its time loading hfile indices over and over again.
+If your working set it such that block cache does you no good, at least size the block cache such that hfile indices will stay up in the cache (you can get a rough idea on the size you need by surveying regionserver UIs; you'll see index block size accounted near the top of the webpage).
+
+[[nagles]]
+==== link:http://en.wikipedia.org/wiki/Nagle's_algorithm[Nagle's] or the small package problem
+
+If a big 40ms or so occasional delay is seen in operations against HBase, try the Nagles' setting.
+For example, see the user mailing list thread, link:http://search-hadoop.com/m/pduLg2fydtE/Inconsistent+scan+performance+with+caching+set+&subj=Re+Inconsistent+scan+performance+with+caching+set+to+1[Inconsistent scan performance with caching set to 1] and the issue cited therein where setting notcpdelay improved scan speeds.
+You might also see the graphs on the tail of link:https://issues.apache.org/jira/browse/HBASE-7008[HBASE-7008 Set scanner caching to a better default] where our Lars Hofhansl tries various data sizes w/ Nagle's on and off measuring the effect.
+
+[[mttr]]
+==== Better Mean Time to Recover (MTTR)
+
+This section is about configurations that will make servers come back faster after a fail.
+See the Deveraj Das an Nicolas Liochon blog post link:http://hortonworks.com/blog/introduction-to-hbase-mean-time-to-recover-mttr/[Introduction to HBase Mean Time to Recover (MTTR)] for a brief introduction.
+
+The issue link:https://issues.apache.org/jira/browse/HBASE-8389[HBASE-8354 forces Namenode into loop with lease recovery requests] is messy but has a bunch of good discussion toward the end on low timeouts and how to effect faster recovery including citation of fixes added to HDFS.
+Read the Varun Sharma comments.
+The below suggested configurations are Varun's suggestions distilled and tested.
+Make sure you are running on a late-version HDFS so you have the fixes he refers too and himself adds to HDFS that help HBase MTTR (e.g.
+HDFS-3703, HDFS-3712, and HDFS-4791 -- hadoop 2 for sure has them and late hadoop 1 has some). Set the following in the RegionServer.
+
+[source,xml]
+----
+
+
+
+ hbase.lease.recovery.dfs.timeout
+ 23000
+ How much time we allow elapse between calls to recover lease.
+ Should be larger than the dfs timeout.
+
+
+ dfs.client.socket-timeout
+ 10000
+ Down the DFS timeout from 60 to 10 seconds.
+
+----
+
+And on the namenode/datanode side, set the following to enable 'staleness' introduced in HDFS-3703, HDFS-3912.
+
+[source,xml]
+----
+
+
+ dfs.client.socket-timeout
+ 10000
+ Down the DFS timeout from 60 to 10 seconds.
+
+
+ dfs.datanode.socket.write.timeout
+ 10000
+ Down the DFS timeout from 8 * 60 to 10 seconds.
+
+
+ ipc.client.connect.timeout
+ 3000
+ Down from 60 seconds to 3.
+
+
+ ipc.client.connect.max.retries.on.timeouts
+ 2
+ Down from 45 seconds to 3 (2 == 3 retries).
+
+
+ dfs.namenode.avoid.read.stale.datanode
+ true
+ Enable stale state in hdfs
+
+
+ dfs.namenode.stale.datanode.interval
+ 20000
+ Down from default 30 seconds
+
+
+ dfs.namenode.avoid.write.stale.datanode
+ true
+ Enable stale state in hdfs
+
+----
+
+[[jmx_config]]
+==== JMX
+
+JMX(Java Management Extensions) provides built-in instrumentation that enables you to monitor and manage the Java VM.
+To enable monitoring and management from remote systems, you need to set system property com.sun.management.jmxremote.port(the port number through which you want to enable JMX RMI connections) when you start the Java VM.
+See link:http://docs.oracle.com/javase/6/docs/technotes/guides/management/agent.html[official document] for more information.
+Historically, besides above port mentioned, JMX opens 2 additional random TCP listening ports, which could lead to port conflict problem.(See link:https://issues.apache.org/jira/browse/HBASE-10289[HBASE-10289] for details)
+
+As an alternative, You can use the coprocessor-based JMX implementation provided by HBase.
+To enable it in 0.99 or above, add below property in [path]_hbase-site.xml_:
+
+[source,xml]
+----
+
+ hbase.coprocessor.regionserver.classes
+ org.apache.hadoop.hbase.JMXListener
+
+----
+
+NOTE: DO NOT set com.sun.management.jmxremote.port for Java VM at the same time.
+
+Currently it supports Master and RegionServer Java VM.
+The reason why you only configure coprocessor for 'regionserver' is that, starting from HBase 0.99, a Master IS also a RegionServer.
+(See link:https://issues.apache.org/jira/browse/HBASE-10569[HBASE-10569] for more information.) By default, the JMX listens on TCP port 10102, you can further configure the port using below properties:
+
+[source,xml]
+----
+
+
+ regionserver.rmi.registry.port
+ 61130
+
+
+ regionserver.rmi.connector.port
+ 61140
+
+----
+
+The registry port can be shared with connector port in most cases, so you only need to configure regionserver.rmi.registry.port.
+However if you want to use SSL communication, the 2 ports must be configured to different values.
+
+By default the password authentication and SSL communication is disabled.
+To enable password authentication, you need to update [path]_hbase-env.sh_ like below:
+----
+export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.authenticate=true \
+ -Dcom.sun.management.jmxremote.password.file=your_password_file \
+ -Dcom.sun.management.jmxremote.access.file=your_access_file"
+
+export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE "
+export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE "
+----
+
+See example password/access file under $JRE_HOME/lib/management.
+
+To enable SSL communication with password authentication, follow below steps:
+
+----
+#1. generate a key pair, stored in myKeyStore
+keytool -genkey -alias jconsole -keystore myKeyStore
+
+#2. export it to file jconsole.cert
+keytool -export -alias jconsole -keystore myKeyStore -file jconsole.cert
+
+#3. copy jconsole.cert to jconsole client machine, import it to jconsoleKeyStore
+keytool -import -alias jconsole -keystore jconsoleKeyStore -file jconsole.cert
+----
+
+And then update [path]_hbase-env.sh_ like below:
+
+----
+
+export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=true \
+ -Djavax.net.ssl.keyStore=/home/tianq/myKeyStore \
+ -Djavax.net.ssl.keyStorePassword=your_password_in_step_1 \
+ -Dcom.sun.management.jmxremote.authenticate=true \
+ -Dcom.sun.management.jmxremote.password.file=your_password file \
+ -Dcom.sun.management.jmxremote.access.file=your_access_file"
+
+export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE "
+export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE "
+----
+
+Finally start jconsole on client using the key store:
+
+----
+jconsole -J-Djavax.net.ssl.trustStore=/home/tianq/jconsoleKeyStore
+----
+
+NOTE: for HBase 0.98, To enable the HBase JMX implementation on Master, you also need to add below property in [path]_hbase-site.xml_:
+
+[source,xml]
+----
+
+ hbase.coprocessor.master.classes
+ org.apache.hadoop.hbase.JMXListener
+
+----
+
+The corresponding properties for port configuration are master.rmi.registry.port (by default 10101) and master.rmi.connector.port(by default the same as registry.port)
+
+[[dyn_config]]
+== Dynamic Configuration
+
+Since HBase 1.0.0, it is possible to change a subset of the configuration without requiring a server restart.
+In the hbase shell, there are new operators, +update_config+ and +update_all_config+ that will prompt a server or all servers to reload configuration.
+
+Only a subset of all configurations can currently be changed in the running server.
+Here is an incomplete list: +hbase.regionserver.thread.compaction.large+, +hbase.regionserver.thread.compaction.small+, +hbase.regionserver.thread.split+, +hbase.regionserver.thread.merge+, as well as compaction policy and configurations and adjustment to offpeak hours.
+For the full list consult the patch attached to link:https://issues.apache.org/jira/browse/HBASE-12147[HBASE-12147 Porting Online Config Change from 89-fb].
+
+ifdef::backend-docbook[]
+[index]
+== Index
+// Generated automatically by the DocBook toolchain.
+endif::backend-docbook[]
diff --git a/src/main/asciidoc/_chapters/cp.adoc b/src/main/asciidoc/_chapters/cp.adoc
new file mode 100644
index 0000000..032275a
--- /dev/null
+++ b/src/main/asciidoc/_chapters/cp.adoc
@@ -0,0 +1,233 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[[cp]]
+= Apache HBase Coprocessors
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+HBase coprocessors are modeled after the coprocessors which are part of Google's BigTable (link:http://www.scribd.com/doc/21631448/Dean-Keynote-Ladis2009, pages 66-67.). Coprocessors function in a similar way to Linux kernel modules.
+They provide a way to run server-level code against locally-stored data.
+The functionality they provide is very powerful, but also carries great risk and can have adverse effects on the system, at the level of the operating system.
+The information in this chapter is primarily sourced and heavily reused from Mingjie Lai's blog post at link:https://blogs.apache.org/hbase/entry/coprocessor_introduction.
+
+Coprocessors are not designed to be used by end users of HBase, but by HBase developers who need to add specialized functionality to HBase.
+One example of the use of coprocessors is pluggable compaction and scan policies, which are provided as coprocessors in link:HBASE-6427.
+
+== Coprocessor Framework
+
+The implementation of HBase coprocessors diverges from the BigTable implementation.
+The HBase framework provides a library and runtime environment for executing user code within the HBase region server and master processes.
+
+The framework API is provided in the link:https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/coprocessor/package-summary.html[coprocessor] package.
+
+Two different types of coprocessors are provided by the framework, based on their scope.
+
+.Types of CoprocessorsSystem Coprocessors::
+ System coprocessors are loaded globally on all tables and regions hosted by a region server.
+
+Table Coprocessors::
+ You can specify which coprocessors should be loaded on all regions for a table on a per-table basis.
+
+The framework provides two different aspects of extensions as well: [firstterm]_observers_ and [firstterm]_endpoints_.
+
+Observers::
+ Observers are analogous to triggers in conventional databases.
+ They allow you to insert user code by overriding upcall methods provided by the coprocessor framework.
+ Callback functions are executed from core HBase code when events occur.
+ Callbacks are handled by the framework, and the coprocessor itself only needs to insert the extended or alternate functionality.
+
+Endpoints (HBase 0.96.x and later)::
+ The implementation for endpoints changed significantly in HBase 0.96.x due to the introduction of protocol buffers (protobufs) (link:https://issues.apache.org/jira/browse/HBASE-5448[HBASE-5488]). If you created endpoints before 0.96.x, you will need to rewrite them.
+ Endpoints are now defined and callable as protobuf services, rather than endpoint invocations passed through as Writable blobs
+
+Endpoints (HBase 0.94.x and earlier)::
+ Dynamic RPC endpoints resemble stored procedures.
+ An endpoint can be invoked at any time from the client.
+ When it is invoked, it is executed remotely at the target region or regions, and results of the executions are returned to the client.
+
+== Examples
+
+An example of an observer is included in [path]_hbase-examples/src/test/java/org/apache/hadoop/hbase/coprocessor/example/TestZooKeeperScanPolicyObserver.java_.
+Several endpoint examples are included in the same directory.
+
+== Building A Coprocessor
+
+Before you can build a processor, it must be developed, compiled, and packaged in a JAR file.
+The next step is to configure the coprocessor framework to use your coprocessor.
+You can load the coprocessor from your HBase configuration, so that the coprocessor starts with HBase, or you can configure the coprocessor from the HBase shell, as a table attribute, so that it is loaded dynamically when the table is opened or reopened.
+
+=== Load from Configuration
+
+To configure a coprocessor to be loaded when HBase starts, modify the RegionServer's [path]_hbase-site.xml_ and configure one of the following properties, based on the type of observer you are configuring:
+
+* [code]+hbase.coprocessor.region.classes+for RegionObservers and Endpoints
+* [code]+hbase.coprocessor.wal.classes+for WALObservers
+* [code]+hbase.coprocessor.master.classes+for MasterObservers
+
+.Example RegionObserver Configuration
+====
+In this example, one RegionObserver is configured for all the HBase tables.
+
+----
+
+
+ hbase.coprocessor.region.classes
+ org.apache.hadoop.hbase.coprocessor.AggregateImplementation
+
+----
+====
+
+If multiple classes are specified for loading, the class names must be comma-separated.
+The framework attempts to load all the configured classes using the default class loader.
+Therefore, the jar file must reside on the server-side HBase classpath.
+
+Coprocessors which are loaded in this way will be active on all regions of all tables.
+These are the system coprocessor introduced earlier.
+The first listed coprocessors will be assigned the priority [literal]+Coprocessor.Priority.SYSTEM+.
+Each subsequent coprocessor in the list will have its priority value incremented by one (which reduces its priority, because priorities have the natural sort order of Integers).
+
+When calling out to registered observers, the framework executes their callbacks methods in the sorted order of their priority.
+Ties are broken arbitrarily.
+
+=== Load from the HBase Shell
+
+You can load a coprocessor on a specific table via a table attribute.
+The following example will load the [systemitem]+FooRegionObserver+ observer when table [systemitem]+t1+ is read or re-read.
+
+.Load a Coprocessor On a Table Using HBase Shell
+====
+----
+
+hbase(main):005:0> alter 't1', METHOD => 'table_att',
+ 'coprocessor'=>'hdfs:///foo.jar|com.foo.FooRegionObserver|1001|arg1=1,arg2=2'
+Updating all regions with the new schema...
+1/1 regions updated.
+Done.
+0 row(s) in 1.0730 seconds
+
+hbase(main):006:0> describe 't1'
+DESCRIPTION ENABLED
+ {NAME => 't1', coprocessor$1 => 'hdfs:///foo.jar|com.foo.FooRegio false
+ nObserver|1001|arg1=1,arg2=2', FAMILIES => [{NAME => 'c1', DATA_B
+ LOCK_ENCODING => 'NONE', BLOOMFILTER => 'NONE', REPLICATION_SCOPE
+ => '0', VERSIONS => '3', COMPRESSION => 'NONE', MIN_VERSIONS =>
+ '0', TTL => '2147483647', KEEP_DELETED_CELLS => 'false', BLOCKSIZ
+ E => '65536', IN_MEMORY => 'false', ENCODE_ON_DISK => 'true', BLO
+ CKCACHE => 'true'}, {NAME => 'f1', DATA_BLOCK_ENCODING => 'NONE',
+ BLOOMFILTER => 'NONE', REPLICATION_SCOPE => '0', VERSIONS => '3'
+ , COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => '2147483647'
+ , KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY
+ => 'false', ENCODE_ON_DISK => 'true', BLOCKCACHE => 'true'}]}
+1 row(s) in 0.0190 seconds
+----
+====
+
+The coprocessor framework will try to read the class information from the coprocessor table attribute value.
+The value contains four pieces of information which are separated by the [literal]+|+ character.
+
+* File path: The jar file containing the coprocessor implementation must be in a location where all region servers can read it.
+ You could copy the file onto the local disk on each region server, but it is recommended to store it in HDFS.
+* Class name: The full class name of the coprocessor.
+* Priority: An integer.
+ The framework will determine the execution sequence of all configured observers registered at the same hook using priorities.
+ This field can be left blank.
+ In that case the framework will assign a default priority value.
+* Arguments: This field is passed to the coprocessor implementation.
+
+.Unload a Coprocessor From a Table Using HBase Shell
+====
+----
+
+hbase(main):007:0> alter 't1', METHOD => 'table_att_unset',
+hbase(main):008:0* NAME => 'coprocessor$1'
+Updating all regions with the new schema...
+1/1 regions updated.
+Done.
+0 row(s) in 1.1130 seconds
+
+hbase(main):009:0> describe 't1'
+DESCRIPTION ENABLED
+ {NAME => 't1', FAMILIES => [{NAME => 'c1', DATA_BLOCK_ENCODING => false
+ 'NONE', BLOOMFILTER => 'NONE', REPLICATION_SCOPE => '0', VERSION
+ S => '3', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => '214
+ 7483647', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN
+ _MEMORY => 'false', ENCODE_ON_DISK => 'true', BLOCKCACHE => 'true
+ '}, {NAME => 'f1', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER =>
+ 'NONE', REPLICATION_SCOPE => '0', VERSIONS => '3', COMPRESSION =>
+ 'NONE', MIN_VERSIONS => '0', TTL => '2147483647', KEEP_DELETED_C
+ ELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', ENCO
+ DE_ON_DISK => 'true', BLOCKCACHE => 'true'}]}
+1 row(s) in 0.0180 seconds
+----
+====
+
+WARNING: There is no guarantee that the framework will load a given coprocessor successfully.
+For example, the shell command neither guarantees a jar file exists at a particular location nor verifies whether the given class is actually contained in the jar file.
+
+== Check the Status of a Coprocessor
+
+To check the status of a coprocessor after it has been configured, use the +status+ HBase Shell command.
+
+----
+
+hbase(main):020:0> status 'detailed'
+version 0.92-tm-6
+0 regionsInTransition
+master coprocessors: []
+1 live servers
+ localhost:52761 1328082515520
+ requestsPerSecond=3, numberOfOnlineRegions=3, usedHeapMB=32, maxHeapMB=995
+ -ROOT-,,0
+ numberOfStores=1, numberOfStorefiles=1, storefileUncompressedSizeMB=0, storefileSizeMB=0, memstoreSizeMB=0,
+storefileIndexSizeMB=0, readRequestsCount=54, writeRequestsCount=1, rootIndexSizeKB=0, totalStaticIndexSizeKB=0,
+totalStaticBloomSizeKB=0, totalCompactingKVs=0, currentCompactedKVs=0, compactionProgressPct=NaN, coprocessors=[]
+ .META.,,1
+ numberOfStores=1, numberOfStorefiles=0, storefileUncompressedSizeMB=0, storefileSizeMB=0, memstoreSizeMB=0,
+storefileIndexSizeMB=0, readRequestsCount=97, writeRequestsCount=4, rootIndexSizeKB=0, totalStaticIndexSizeKB=0,
+totalStaticBloomSizeKB=0, totalCompactingKVs=0, currentCompactedKVs=0, compactionProgressPct=NaN, coprocessors=[]
+ t1,,1328082575190.c0491168a27620ffe653ec6c04c9b4d1.
+ numberOfStores=2, numberOfStorefiles=1, storefileUncompressedSizeMB=0, storefileSizeMB=0, memstoreSizeMB=0,
+storefileIndexSizeMB=0, readRequestsCount=0, writeRequestsCount=0, rootIndexSizeKB=0, totalStaticIndexSizeKB=0,
+totalStaticBloomSizeKB=0, totalCompactingKVs=0, currentCompactedKVs=0, compactionProgressPct=NaN,
+coprocessors=[AggregateImplementation]
+0 dead servers
+----
+
+== Monitor Time Spent in Coprocessors
+
+HBase 0.98.5 introduced the ability to monitor some statistics relating to the amount of time spent executing a given coprocessor.
+You can see these statistics via the HBase Metrics framework (see <> or the Web UI for a given Region Server, via the [label]#Coprocessor Metrics# tab.
+These statistics are valuable for debugging and benchmarking the performance impact of a given coprocessor on your cluster.
+Tracked statistics include min, max, average, and 90th, 95th, and 99th percentile.
+All times are shown in milliseconds.
+The statistics are calculated over coprocessor execution samples recorded during the reporting interval, which is 10 seconds by default.
+The metrics sampling rate as described in <>.
+
+.Coprocessor Metrics UI
+image::coprocessor_stats.png[]
+
+== Status of Coprocessors in HBase
+
+Coprocessors and the coprocessor framework are evolving rapidly and work is ongoing on several different JIRAs.
diff --git a/src/main/asciidoc/_chapters/datamodel.adoc b/src/main/asciidoc/_chapters/datamodel.adoc
new file mode 100644
index 0000000..8bca69b
--- /dev/null
+++ b/src/main/asciidoc/_chapters/datamodel.adoc
@@ -0,0 +1,585 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[[datamodel]]
+= Data Model
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+In HBase, data is stored in tables, which have rows and columns.
+This is a terminology overlap with relational databases (RDBMSs), but this is not a helpful analogy.
+Instead, it can be helpful to think of an HBase table as a multi-dimensional map.
+
+.HBase Data Model TerminologyTable::
+ An HBase table consists of multiple rows.
+
+Row::
+ A row in HBase consists of a row key and one or more columns with values associated with them.
+ Rows are sorted alphabetically by the row key as they are stored.
+ For this reason, the design of the row key is very important.
+ The goal is to store data in such a way that related rows are near each other.
+ A common row key pattern is a website domain.
+ If your row keys are domains, you should probably store them in reverse (org.apache.www, org.apache.mail, org.apache.jira). This way, all of the Apache domains are near each other in the table, rather than being spread out based on the first letter of the subdomain.
+
+Column::
+ A column in HBase consists of a column family and a column qualifier, which are delimited by a [literal]+:+ (colon) character.
+
+Column Family::
+ Column families physically colocate a set of columns and their values, often for performance reasons.
+ Each column family has a set of storage properties, such as whether its values should be cached in memory, how its data is compressed or its row keys are encoded, and others.
+ Each row in a table has the same column families, though a given row might not store anything in a given column family.
+
+Column Qualifier::
+ A column qualifier is added to a column family to provide the index for a given piece of data.
+ Given a column family [literal]+content+, a column qualifier might be [literal]+content:html+, and another might be [literal]+content:pdf+.
+ Though column families are fixed at table creation, column qualifiers are mutable and may differ greatly between rows.
+
+Cell::
+ A cell is a combination of row, column family, and column qualifier, and contains a value and a timestamp, which represents the value's version.
+
+Timestamp::
+ A timestamp is written alongside each value, and is the identifier for a given version of a value.
+ By default, the timestamp represents the time on the RegionServer when the data was written, but you can specify a different timestamp value when you put data into the cell.
+
+[[conceptual.view]]
+== Conceptual View
+
+You can read a very understandable explanation of the HBase data model in the blog post link:http://jimbojw.com/wiki/index.php?title=Understanding_Hbase_and_BigTable[Understanding
+ HBase and BigTable] by Jim R.
+Wilson.
+Another good explanation is available in the PDF link:http://0b4af6cdc2f0c5998459-c0245c5c937c5dedcca3f1764ecc9b2f.r43.cf2.rackcdn.com/9353-login1210_khurana.pdf[Introduction
+ to Basic Schema Design] by Amandeep Khurana.
+It may help to read different perspectives to get a solid understanding of HBase schema design.
+The linked articles cover the same ground as the information in this section.
+
+The following example is a slightly modified form of the one on page 2 of the link:http://research.google.com/archive/bigtable.html[BigTable] paper.
+There is a table called [var]+webtable+ that contains two rows ([literal]+com.cnn.www+ and [literal]+com.example.www+), three column families named [var]+contents+, [var]+anchor+, and [var]+people+.
+In this example, for the first row ([literal]+com.cnn.www+), [var]+anchor+ contains two columns ([var]+anchor:cssnsi.com+, [var]+anchor:my.look.ca+) and [var]+contents+ contains one column ([var]+contents:html+). This example contains 5 versions of the row with the row key [literal]+com.cnn.www+, and one version of the row with the row key [literal]+com.example.www+.
+The [var]+contents:html+ column qualifier contains the entire HTML of a given website.
+Qualifiers of the [var]+anchor+ column family each contain the external site which links to the site represented by the row, along with the text it used in the anchor of its link.
+The [var]+people+ column family represents people associated with the site.
+
+.Column Names
+[NOTE]
+====
+By convention, a column name is made of its column family prefix and a _qualifier_.
+For example, the column _contents:html_ is made up of the column family [var]+contents+ and the [var]+html+ qualifier.
+The colon character ([literal]+:+) delimits the column family from the column family _qualifier_.
+====
+
+.Table [var]+webtable+
+[cols="1,1,1,1,1", frame="all", options="header"]
+|===
+| Row Key
+| Time Stamp
+| ColumnFamily contents
+| ColumnFamily anchor
+| ColumnFamily people
+| anchor:cnnsi.com = "CNN"
+
+| anchor:my.look.ca = "CNN.com"
+
+| contents:html = "..."
+
+| contents:html = "..."
+
+| contents:html = "..."
+
+| contents:html = "..."
+|===
+
+Cells in this table that appear to be empty do not take space, or in fact exist, in HBase.
+This is what makes HBase "sparse." A tabular view is not the only possible way to look at data in HBase, or even the most accurate.
+The following represents the same information as a multi-dimensional map.
+This is only a mock-up for illustrative purposes and may not be strictly accurate.
+
+[source]
+----
+
+{
+ "com.cnn.www": {
+ contents: {
+ t6: contents:html: "..."
+ t5: contents:html: "..."
+ t3: contents:html: "..."
+ }
+ anchor: {
+ t9: anchor:cnnsi.com = "CNN"
+ t8: anchor:my.look.ca = "CNN.com"
+ }
+ people: {}
+ }
+ "com.example.www": {
+ contents: {
+ t5: contents:html: "..."
+ }
+ anchor: {}
+ people: {
+ t5: people:author: "John Doe"
+ }
+ }
+}
+----
+
+[[physical.view]]
+== Physical View
+
+Although at a conceptual level tables may be viewed as a sparse set of rows, they are physically stored by column family.
+A new column qualifier (column_family:column_qualifier) can be added to an existing column family at any time.
+
+.ColumnFamily [var]+anchor+
+[cols="1,1,1", frame="all", options="header"]
+|===
+| Row Key
+| Time Stamp
+| Column Family anchor
+| anchor:cnnsi.com = "CNN"
+
+| anchor:my.look.ca = "CNN.com"
+|===
+
+.ColumnFamily [var]+contents+
+[cols="1,1,1", frame="all", options="header"]
+|===
+| Row Key
+| Time Stamp
+| ColumnFamily "contents:"
+| contents:html = "..."
+
+| contents:html = "..."
+
+| contents:html = "..."
+|===
+
+The empty cells shown in the conceptual view are not stored at all.
+Thus a request for the value of the [var]+contents:html+ column at time stamp [literal]+t8+ would return no value.
+Similarly, a request for an [var]+anchor:my.look.ca+ value at time stamp [literal]+t9+ would return no value.
+However, if no timestamp is supplied, the most recent value for a particular column would be returned.
+Given multiple versions, the most recent is also the first one found, since timestamps are stored in descending order.
+Thus a request for the values of all columns in the row [var]+com.cnn.www+ if no timestamp is specified would be: the value of [var]+contents:html+ from timestamp [literal]+t6+, the value of [var]+anchor:cnnsi.com+ from timestamp [literal]+t9+, the value of [var]+anchor:my.look.ca+ from timestamp [literal]+t8+.
+
+For more information about the internals of how Apache HBase stores data, see <>.
+
+== Namespace
+
+A namespace is a logical grouping of tables analogous to a database in relation database systems.
+This abstraction lays the groundwork for upcoming multi-tenancy related features:
+
+* Quota Management (HBASE-8410) - Restrict the amount of resources (ie regions, tables) a namespace can consume.
+* Namespace Security Administration (HBASE-9206) - provide another level of security administration for tenants.
+* Region server groups (HBASE-6721) - A namespace/table can be pinned onto a subset of regionservers thus guaranteeing a course level of isolation.
+
+[[namespace_creation]]
+=== Namespace management
+
+A namespace can be created, removed or altered.
+Namespace membership is determined during table creation by specifying a fully-qualified table name of the form:
+
+[source,xml]
+----
+
:
+----
+
+.Examples
+====
+[source,bourne]
+----
+
+#Create a namespace
+create_namespace 'my_ns'
+----
+
+[source,bourne]
+----
+
+#create my_table in my_ns namespace
+create 'my_ns:my_table', 'fam'
+----
+
+[source,bourne]
+----
+
+#drop namespace
+drop_namespace 'my_ns'
+----
+
+[source,bourne]
+----
+
+#alter namespace
+alter_namespace 'my_ns', {METHOD => 'set', 'PROPERTY_NAME' => 'PROPERTY_VALUE'}
+----
+====
+
+[[namespace_special]]
+=== Predefined namespaces
+
+There are two predefined special namespaces:
+
+* hbase - system namespace, used to contain hbase internal tables
+* default - tables with no explicit specified namespace will automatically fall into this namespace.
+
+.Examples
+====
+[source,bourne]
+----
+
+#namespace=foo and table qualifier=bar
+create 'foo:bar', 'fam'
+
+#namespace=default and table qualifier=bar
+create 'bar', 'fam'
+----
+====
+
+== Table
+
+Tables are declared up front at schema definition time.
+
+== Row
+
+Row keys are uninterrpreted bytes.
+Rows are lexicographically sorted with the lowest order appearing first in a table.
+The empty byte array is used to denote both the start and end of a tables' namespace.
+
+[[columnfamily]]
+== Column Family
+
+Columns in Apache HBase are grouped into _column families_.
+All column members of a column family have the same prefix.
+For example, the columns _courses:history_ and _courses:math_ are both members of the _courses_ column family.
+The colon character ([literal]+:+) delimits the column family from the
+column family qualifier.
+The column family prefix must be composed of _printable_ characters.
+The qualifying tail, the column family _qualifier_, can be made of any arbitrary bytes.
+Column families must be declared up front at schema definition time whereas columns do not need to be defined at schema time but can be conjured on the fly while the table is up an running.
+
+Physically, all column family members are stored together on the filesystem.
+Because tunings and storage specifications are done at the column family level, it is advised that all column family members have the same general access pattern and size characteristics.
+
+== Cells
+
+A _{row, column, version}_ tuple exactly specifies a [literal]+cell+ in HBase.
+Cell content is uninterrpreted bytes
+
+== Data Model Operations
+
+The four primary data model operations are Get, Put, Scan, and Delete.
+Operations are applied via link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html[Table] instances.
+
+=== Get
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html[Get] returns attributes for a specified row.
+Gets are executed via link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#get(org.apache.hadoop.hbase.client.Get)[
+ Table.get].
+
+=== Put
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Put.html[Put] either adds new rows to a table (if the key is new) or can update existing rows (if the key already exists). Puts are executed via link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#put(org.apache.hadoop.hbase.client.Put)[
+ Table.put] (writeBuffer) or link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#batch(java.util.List, java.lang.Object[])[
+ Table.batch] (non-writeBuffer).
+
+[[scan]]
+=== Scans
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html[Scan] allow iteration over multiple rows for specified attributes.
+
+The following is an example of a Scan on a Table instance.
+Assume that a table is populated with rows with keys "row1", "row2", "row3", and then another set of rows with the keys "abc1", "abc2", and "abc3". The following example shows how to set a Scan instance to return the rows beginning with "row".
+
+[source,java]
+----
+
+public static final byte[] CF = "cf".getBytes();
+public static final byte[] ATTR = "attr".getBytes();
+...
+
+Table table = ... // instantiate a Table instance
+
+Scan scan = new Scan();
+scan.addColumn(CF, ATTR);
+scan.setRowPrefixFilter(Bytes.toBytes("row"));
+ResultScanner rs = table.getScanner(scan);
+try {
+ for (Result r = rs.next(); r != null; r = rs.next()) {
+ // process result...
+} finally {
+ rs.close(); // always close the ResultScanner!
+----
+
+Note that generally the easiest way to specify a specific stop point for a scan is by using the link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/InclusiveStopFilter.html[InclusiveStopFilter] class.
+
+=== Delete
+
+link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Delete.html[Delete] removes a row from a table.
+Deletes are executed via link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Table.html#delete(org.apache.hadoop.hbase.client.Delete)[
+ HTable.delete].
+
+HBase does not modify data in place, and so deletes are handled by creating new markers called _tombstones_.
+These tombstones, along with the dead values, are cleaned up on major compactions.
+
+See <> for more information on deleting versions of columns, and see <> for more information on compactions.
+
+[[versions]]
+== Versions
+
+A _{row, column, version}_ tuple exactly specifies a [literal]+cell+ in HBase.
+It's possible to have an unbounded number of cells where the row and column are the same but the cell address differs only in its version dimension.
+
+While rows and column keys are expressed as bytes, the version is specified using a long integer.
+Typically this long contains time instances such as those returned by [code]+java.util.Date.getTime()+ or [code]+System.currentTimeMillis()+, that is: [quote]_the difference, measured in milliseconds, between the current time and midnight, January 1, 1970 UTC_.
+
+The HBase version dimension is stored in decreasing order, so that when reading from a store file, the most recent values are found first.
+
+There is a lot of confusion over the semantics of [literal]+cell+ versions, in HBase.
+In particular:
+
+* If multiple writes to a cell have the same version, only the last written is fetchable.
+* It is OK to write cells in a non-increasing version order.
+
+Below we describe how the version dimension in HBase currently works.
+See link:https://issues.apache.org/jira/browse/HBASE-2406[HBASE-2406] for discussion of HBase versions. link:http://outerthought.org/blog/417-ot.html[Bending time in HBase] makes for a good read on the version, or time, dimension in HBase.
+It has more detail on versioning than is provided here.
+As of this writing, the limiitation _Overwriting values at existing timestamps_ mentioned in the article no longer holds in HBase.
+This section is basically a synopsis of this article by Bruno Dumon.
+
+[[specify.number.of.versions]]
+=== Specifying the Number of Versions to Store
+
+The maximum number of versions to store for a given column is part of the column schema and is specified at table creation, or via an +alter+ command, via [code]+HColumnDescriptor.DEFAULT_VERSIONS+.
+Prior to HBase 0.96, the default number of versions kept was [literal]+3+, but in 0.96 and newer has been changed to [literal]+1+.
+
+.Modify the Maximum Number of Versions for a Column
+====
+This example uses HBase Shell to keep a maximum of 5 versions of column [code]+f1+.
+You could also use link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html[HColumnDescriptor].
+
+----
+hbase> alter ‘t1′, NAME => ‘f1′, VERSIONS => 5
+----
+====
+
+.Modify the Minimum Number of Versions for a Column
+====
+You can also specify the minimum number of versions to store.
+By default, this is set to 0, which means the feature is disabled.
+The following example sets the minimum number of versions on field [code]+f1+ to [literal]+2+, via HBase Shell.
+You could also use link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html[HColumnDescriptor].
+
+----
+hbase> alter ‘t1′, NAME => ‘f1′, MIN_VERSIONS => 2
+----
+====
+
+Starting with HBase 0.98.2, you can specify a global default for the maximum number of versions kept for all newly-created columns, by setting +hbase.column.max.version+ in [path]_hbase-site.xml_.
+See <>.
+
+[[versions.ops]]
+=== Versions and HBase Operations
+
+In this section we look at the behavior of the version dimension for each of the core HBase operations.
+
+==== Get/Scan
+
+Gets are implemented on top of Scans.
+The below discussion of link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html[Get] applies equally to link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Scan.html[Scans].
+
+By default, i.e.
+if you specify no explicit version, when doing a [literal]+get+, the cell whose version has the largest value is returned (which may or may not be the latest one written, see later). The default behavior can be modified in the following ways:
+
+* to return more than one version, see link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Get.html#setMaxVersions()[Get.setMaxVersions()]
+* to return versions other than the latest, see link:???[Get.setTimeRange()]
++
+To retrieve the latest version that is less than or equal to a given value, thus giving the 'latest' state of the record at a certain point in time, just use a range from 0 to the desired version and set the max versions to 1.
+
+
+==== Default Get Example
+
+The following Get will only retrieve the current version of the row
+
+[source,java]
+----
+
+public static final byte[] CF = "cf".getBytes();
+public static final byte[] ATTR = "attr".getBytes();
+...
+Get get = new Get(Bytes.toBytes("row1"));
+Result r = table.get(get);
+byte[] b = r.getValue(CF, ATTR); // returns current version of value
+----
+
+==== Versioned Get Example
+
+The following Get will return the last 3 versions of the row.
+
+[source,java]
+----
+
+public static final byte[] CF = "cf".getBytes();
+public static final byte[] ATTR = "attr".getBytes();
+...
+Get get = new Get(Bytes.toBytes("row1"));
+get.setMaxVersions(3); // will return last 3 versions of row
+Result r = table.get(get);
+byte[] b = r.getValue(CF, ATTR); // returns current version of value
+List kv = r.getColumn(CF, ATTR); // returns all versions of this column
+----
+
+==== Put
+
+Doing a put always creates a new version of a [literal]+cell+, at a certain timestamp.
+By default the system uses the server's [literal]+currentTimeMillis+, but you can specify the version (= the long integer) yourself, on a per-column level.
+This means you could assign a time in the past or the future, or use the long value for non-time purposes.
+
+To overwrite an existing value, do a put at exactly the same row, column, and version as that of the cell you would overshadow.
+
+===== Implicit Version Example
+
+The following Put will be implicitly versioned by HBase with the current time.
+
+[source,java]
+----
+
+public static final byte[] CF = "cf".getBytes();
+public static final byte[] ATTR = "attr".getBytes();
+...
+Put put = new Put(Bytes.toBytes(row));
+put.add(CF, ATTR, Bytes.toBytes( data));
+table.put(put);
+----
+
+===== Explicit Version Example
+
+The following Put has the version timestamp explicitly set.
+
+[source,java]
+----
+
+public static final byte[] CF = "cf".getBytes();
+public static final byte[] ATTR = "attr".getBytes();
+...
+Put put = new Put( Bytes.toBytes(row));
+long explicitTimeInMs = 555; // just an example
+put.add(CF, ATTR, explicitTimeInMs, Bytes.toBytes(data));
+table.put(put);
+----
+
+Caution: the version timestamp is internally by HBase for things like time-to-live calculations.
+It's usually best to avoid setting this timestamp yourself.
+Prefer using a separate timestamp attribute of the row, or have the timestamp a part of the rowkey, or both.
+
+[[version.delete]]
+==== Delete
+
+There are three different types of internal delete markers.
+See Lars Hofhansl's blog for discussion of his attempt adding another, link:http://hadoop-hbase.blogspot.com/2012/01/scanning-in-hbase.html[Scanning
+ in HBase: Prefix Delete Marker].
+
+* Delete: for a specific version of a column.
+* Delete column: for all versions of a column.
+* Delete family: for all columns of a particular ColumnFamily
+
+When deleting an entire row, HBase will internally create a tombstone for each ColumnFamily (i.e., not each individual column).
+
+Deletes work by creating _tombstone_ markers.
+For example, let's suppose we want to delete a row.
+For this you can specify a version, or else by default the [literal]+currentTimeMillis+ is used.
+What this means is [quote]_delete all
+ cells where the version is less than or equal to this version_.
+HBase never modifies data in place, so for example a delete will not immediately delete (or mark as deleted) the entries in the storage file that correspond to the delete condition.
+Rather, a so-called _tombstone_ is written, which will mask the deleted values.
+When HBase does a major compaction, the tombstones are processed to actually remove the dead values, together with the tombstones themselves.
+If the version you specified when deleting a row is larger than the version of any value in the row, then you can consider the complete row to be deleted.
+
+For an informative discussion on how deletes and versioning interact, see the thread link:http://comments.gmane.org/gmane.comp.java.hadoop.hbase.user/28421[Put w/
+ timestamp -> Deleteall -> Put w/ timestamp fails] up on the user mailing list.
+
+Also see <> for more information on the internal KeyValue format.
+
+Delete markers are purged during the next major compaction of the store, unless the +KEEP_DELETED_CELLS+ option is set in the column family.
+To keep the deletes for a configurable amount of time, you can set the delete TTL via the +hbase.hstore.time.to.purge.deletes+ property in [path]_hbase-site.xml_.
+If +hbase.hstore.time.to.purge.deletes+ is not set, or set to 0, all delete markers, including those with timestamps in the future, are purged during the next major compaction.
+Otherwise, a delete marker with a timestamp in the future is kept until the major compaction which occurs after the time represented by the marker's timestamp plus the value of +hbase.hstore.time.to.purge.deletes+, in milliseconds.
+
+NOTE: This behavior represents a fix for an unexpected change that was introduced in HBase 0.94, and was fixed in link:https://issues.apache.org/jira/browse/HBASE-10118[HBASE-10118].
+The change has been backported to HBase 0.94 and newer branches.
+
+=== Current Limitations
+
+==== Deletes mask Puts
+
+Deletes mask puts, even puts that happened after the delete was entered.
+See link:https://issues.apache.org/jira/browse/HBASE-2256[HBASE-2256].
+Remember that a delete writes a tombstone, which only disappears after then next major compaction has run.
+Suppose you do a delete of everything <= T.
+After this you do a new put with a timestamp <= T.
+This put, even if it happened after the delete, will be masked by the delete tombstone.
+Performing the put will not fail, but when you do a get you will notice the put did have no effect.
+It will start working again after the major compaction has run.
+These issues should not be a problem if you use always-increasing versions for new puts to a row.
+But they can occur even if you do not care about time: just do delete and put immediately after each other, and there is some chance they happen within the same millisecond.
+
+[[major.compactions.change.query.results]]
+==== Major compactions change query results
+
+[quote]_...create three cell versions at t1, t2 and t3, with a maximum-versions
+ setting of 2. So when getting all versions, only the values at t2 and t3 will be
+ returned. But if you delete the version at t2 or t3, the one at t1 will appear again.
+ Obviously, once a major compaction has run, such behavior will not be the case
+ anymore..._ (See _Garbage Collection_ in link:http://outerthought.org/blog/417-ot.html[Bending time in
+ HBase].)
+
+[[dm.sort]]
+== Sort Order
+
+All data model operations HBase return data in sorted order.
+First by row, then by ColumnFamily, followed by column qualifier, and finally timestamp (sorted in reverse, so newest records are returned first).
+
+[[dm.column.metadata]]
+== Column Metadata
+
+There is no store of column metadata outside of the internal KeyValue instances for a ColumnFamily.
+Thus, while HBase can support not only a wide number of columns per row, but a heterogenous set of columns between rows as well, it is your responsibility to keep track of the column names.
+
+The only way to get a complete set of columns that exist for a ColumnFamily is to process all the rows.
+For more information about how HBase stores data internally, see <>.
+
+== Joins
+
+Whether HBase supports joins is a common question on the dist-list, and there is a simple answer: it doesn't, at not least in the way that RDBMS' support them (e.g., with equi-joins or outer-joins in SQL). As has been illustrated in this chapter, the read data model operations in HBase are Get and Scan.
+
+However, that doesn't mean that equivalent join functionality can't be supported in your application, but you have to do it yourself.
+The two primary strategies are either denormalizing the data upon writing to HBase, or to have lookup tables and do the join between HBase tables in your application or MapReduce code (and as RDBMS' demonstrate, there are several strategies for this depending on the size of the tables, e.g., nested loops vs.
+hash-joins). So which is the best approach? It depends on what you are trying to do, and as such there isn't a single answer that works for every use case.
+
+== ACID
+
+See link:http://hbase.apache.org/acid-semantics.html[ACID Semantics].
+Lars Hofhansl has also written a note on link:http://hadoop-hbase.blogspot.com/2012/03/acid-in-hbase.html[ACID in HBase].
+
+ifdef::backend-docbook[]
+[index]
+== Index
+// Generated automatically by the DocBook toolchain.
+endif::backend-docbook[]
diff --git a/src/main/asciidoc/_chapters/developer.adoc b/src/main/asciidoc/_chapters/developer.adoc
new file mode 100644
index 0000000..4c2bba6
--- /dev/null
+++ b/src/main/asciidoc/_chapters/developer.adoc
@@ -0,0 +1,1951 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[[developer]]
+= Building and Developing Apache HBase
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+This chapter contains information and guidelines for building and releasing HBase code and documentation.
+Being familiar with these guidelines will help the HBase committers to use your contributions more easily.
+
+[[getting.involved]]
+== Getting Involved
+
+Apache HBase gets better only when people contribute! If you are looking to contribute to Apache HBase, look for link:https://issues.apache.org/jira/issues/?jql=project%20%3D%20HBASE%20AND%20labels%20in%20(beginner)[issues in JIRA tagged with the label 'beginner'].
+These are issues HBase contributors have deemed worthy but not of immediate priority and a good way to ramp on HBase internals.
+See link:http://search-hadoop.com/m/DHED43re96[What label
+ is used for issues that are good on ramps for new contributors?] from the dev mailing list for background.
+
+Before you get started submitting code to HBase, please refer to <>.
+
+As Apache HBase is an Apache Software Foundation project, see <> for more information about how the ASF functions.
+
+[[mailing.list]]
+=== Mailing Lists
+
+Sign up for the dev-list and the user-list.
+See the link:http://hbase.apache.org/mail-lists.html[mailing lists] page.
+Posing questions - and helping to answer other people's questions - is encouraged! There are varying levels of experience on both lists so patience and politeness are encouraged (and please stay on topic.)
+
+[[irc]]
+=== Internet Relay Chat (IRC)
+
+For real-time questions and discussions, use the [literal]+#hbase+ IRC channel on the link:https://freenode.net/[FreeNode] IRC network.
+FreeNode offers a web-based client, but most people prefer a native client, and several clients are available for each operating system.
+
+=== Jira
+
+Check for existing issues in link:https://issues.apache.org/jira/browse/HBASE[Jira].
+If it's either a new feature request, enhancement, or a bug, file a ticket.
+
+To check for existing issues which you can tackle as a beginner, search for link:https://issues.apache.org/jira/issues/?jql=project%20%3D%20HBASE%20AND%20labels%20in%20(beginner)[issues in JIRA tagged with the label 'beginner'].
+
+* .JIRA PrioritiesBlocker: Should only be used if the issue WILL cause data loss or cluster instability reliably.
+* Critical: The issue described can cause data loss or cluster instability in some cases.
+* Major: Important but not tragic issues, like updates to the client API that will add a lot of much-needed functionality or significant bugs that need to be fixed but that don't cause data loss.
+* Minor: Useful enhancements and annoying but not damaging bugs.
+* Trivial: Useful enhancements but generally cosmetic.
+
+.Code Blocks in Jira Comments
+====
+A commonly used macro in Jira is {code}. Everything inside the tags is preformatted, as in this example.
+
+[source]
+----
+
+{code}
+code snippet
+{code}
+----
+====
+
+[[repos]]
+== Apache HBase Repositories
+
+There are two different repositories for Apache HBase: Subversion (SVN) and Git.
+GIT is our repository of record for all but the Apache HBase website.
+We used to be on SVN.
+We migrated.
+See link:https://issues.apache.org/jira/browse/INFRA-7768[Migrate Apache HBase SVN Repos to Git].
+Updating hbase.apache.org still requires use of SVN (See <>). See link:http://hbase.apache.org/source-repository.html[Source Code
+ Management] page for contributor and committer links or seach for HBase on the link:http://git.apache.org/[Apache Git] page.
+
+== IDEs
+
+=== Eclipse
+
+[[eclipse.code.formatting]]
+==== Code Formatting
+
+Under the [path]_dev-support/_ folder, you will find [path]_hbase_eclipse_formatter.xml_.
+We encourage you to have this formatter in place in eclipse when editing HBase code.
+
+.Procedure: Load the HBase Formatter Into Eclipse
+. Open the menu item.
+. In Preferences, click the menu item.
+. Click btn:[Import] and browse to the location of the [path]_hbase_eclipse_formatter.xml_ file, which is in the [path]_dev-support/_ directory.
+ Click btn:[Apply].
+. Still in Preferences, click .
+ Be sure the following options are selected:
++
+* Perform the selected actions on save
+* Format source code
+* Format edited lines
++
+Click btn:[Apply].
+Close all dialog boxes and return to the main window.
+
+
+In addition to the automatic formatting, make sure you follow the style guidelines explained in <>
+
+Also, no [code]+@author+ tags - that's a rule.
+Quality Javadoc comments are appreciated.
+And include the Apache license.
+
+[[eclipse.git.plugin]]
+==== Eclipse Git Plugin
+
+If you cloned the project via git, download and install the Git plugin (EGit). Attach to your local git repo (via the [label]#Git Repositories# window) and you'll be able to see file revision history, generate patches, etc.
+
+[[eclipse.maven.setup]]
+==== HBase Project Setup in Eclipse using [code]+m2eclipse+
+
+The easiest way is to use the +m2eclipse+ plugin for Eclipse.
+Eclipse Indigo or newer includes +m2eclipse+, or you can download it from link:http://www.eclipse.org/m2e//. It provides Maven integration for Eclipse, and even lets you use the direct Maven commands from within Eclipse to compile and test your project.
+
+To import the project, click and select the HBase root directory. [code]+m2eclipse+ locates all the hbase modules for you.
+
+If you install +m2eclipse+ and import HBase in your workspace, do the following to fix your eclipse Build Path.
+
+. Remove [path]_target_ folder
+. Add [path]_target/generated-jamon_ and [path]_target/generated-sources/java_ folders.
+. Remove from your Build Path the exclusions on the [path]_src/main/resources_ and [path]_src/test/resources_ to avoid error message in the console, such as the following:
++
+----
+Failed to execute goal
+org.apache.maven.plugins:maven-antrun-plugin:1.6:run (default) on project hbase:
+'An Ant BuildException has occured: Replace: source file .../target/classes/hbase-default.xml
+doesn't exist
+----
++
+This will also reduce the eclipse build cycles and make your life easier when developing.
+
+
+[[eclipse.commandline]]
+==== HBase Project Setup in Eclipse Using the Command Line
+
+Instead of using [code]+m2eclipse+, you can generate the Eclipse files from the command line.
+
+. First, run the following command, which builds HBase.
+ You only need to do this once.
++
+[source,bourne]
+----
+mvn clean install -DskipTests
+----
+
+. Close Eclipse, and execute the following command from the terminal, in your local HBase project directory, to generate new [path]_.project_ and [path]_.classpath_ files.
++
+[source,bourne]
+----
+mvn eclipse:eclipse
+----
+
+. Reopen Eclipse and import the [path]_.project_ file in the HBase directory to a workspace.
+
+[[eclipse.maven.class]]
+==== Maven Classpath Variable
+
+The [var]+$M2_REPO+ classpath variable needs to be set up for the project.
+This needs to be set to your local Maven repository, which is usually [path]_~/.m2/repository_
+
+If this classpath variable is not configured, you will see compile errors in Eclipse like this:
+
+----
+
+Description Resource Path Location Type
+The project cannot be built until build path errors are resolved hbase Unknown Java Problem
+Unbound classpath variable: 'M2_REPO/asm/asm/3.1/asm-3.1.jar' in project 'hbase' hbase Build path Build Path Problem
+Unbound classpath variable: 'M2_REPO/com/google/guava/guava/r09/guava-r09.jar' in project 'hbase' hbase Build path Build Path Problem
+Unbound classpath variable: 'M2_REPO/com/google/protobuf/protobuf-java/2.3.0/protobuf-java-2.3.0.jar' in project 'hbase' hbase Build path Build Path Problem Unbound classpath variable:
+----
+
+[[eclipse.issues]]
+==== Eclipse Known Issues
+
+Eclipse will currently complain about [path]_Bytes.java_.
+It is not possible to turn these errors off.
+
+----
+
+Description Resource Path Location Type
+Access restriction: The method arrayBaseOffset(Class) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1061 Java Problem
+Access restriction: The method arrayIndexScale(Class) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1064 Java Problem
+Access restriction: The method getLong(Object, long) from the type Unsafe is not accessible due to restriction on required library /System/Library/Java/JavaVirtualMachines/1.6.0.jdk/Contents/Classes/classes.jar Bytes.java /hbase/src/main/java/org/apache/hadoop/hbase/util line 1111 Java Problem
+----
+
+[[eclipse.more]]
+==== Eclipse - More Information
+
+For additional information on setting up Eclipse for HBase development on Windows, see link:http://michaelmorello.blogspot.com/2011/09/hbase-subversion-eclipse-windows.html[Michael Morello's blog] on the topic.
+
+=== IntelliJ IDEA
+
+You can set up IntelliJ IDEA for similar functinoality as Eclipse.
+Follow these steps.
+
+. Select
+. You do not need to select a profile.
+ Be sure [label]#Maven project
+ required# is selected, and click btn:[Next].
+. Select the location for the JDK.
+
+.Using the HBase Formatter in IntelliJ IDEA
+Using the Eclipse Code Formatter plugin for IntelliJ IDEA, you can import the HBase code formatter described in <>.
+
+=== Other IDEs
+
+It would be userful to mirror the <> set-up instructions for other IDEs.
+If you would like to assist, please have a look at link:https://issues.apache.org/jira/browse/HBASE-11704[HBASE-11704].
+
+[[build]]
+== Building Apache HBase
+
+[[build.basic]]
+=== Basic Compile
+
+HBase is compiled using Maven.
+You must use Maven 3.x.
+To check your Maven version, run the command +mvn -version+.
+
+.JDK Version Requirements
+[NOTE]
+====
+Starting with HBase 1.0 you must use Java 7 or later to build from source code.
+See <> for more complete information about supported JDK versions.
+====
+
+[[maven.build.commands]]
+==== Maven Build Commands
+
+All commands are executed from the local HBase project directory.
+
+===== Package
+
+The simplest command to compile HBase from its java source code is to use the [code]+package+ target, which builds JARs with the compiled files.
+
+[source,bourne]
+----
+mvn package -DskipTests
+----
+
+Or, to clean up before compiling:
+
+[source,bourne]
+----
+mvn clean package -DskipTests
+----
+
+With Eclipse set up as explained above in <>, you can also use the menu:Build[] command in Eclipse.
+To create the full installable HBase package takes a little bit more work, so read on.
+
+[[maven.build.commands.compile]]
+===== Compile
+
+The [code]+compile+ target does not create the JARs with the compiled files.
+
+[source,bourne]
+----
+mvn compile
+----
+
+[source,bourne]
+----
+mvn clean compile
+----
+
+===== Install
+
+To install the JARs in your [path]_~/.m2/_ directory, use the [code]+install+ target.
+
+[source,bourne]
+----
+mvn install
+----
+
+[source,bourne]
+----
+mvn clean install
+----
+
+[source,bourne]
+----
+mvn clean install -DskipTests
+----
+
+[[maven.build.commands.unitall]]
+==== Running all or individual Unit Tests
+
+See the <> section in <>
+
+[[maven.build.hadoop]]
+==== Building against various hadoop versions.
+
+As of 0.96, Apache HBase supports building against Apache Hadoop versions: 1.0.3, 2.0.0-alpha and 3.0.0-SNAPSHOT.
+By default, in 0.96 and earlier, we will build with Hadoop-1.0.x.
+As of 0.98, Hadoop 1.x is deprecated and Hadoop 2.x is the default.
+To change the version to build against, add a hadoop.profile property when you invoke +mvn+:
+
+[source,bourne]
+----
+mvn -Dhadoop.profile=1.0 ...
+----
+
+The above will build against whatever explicit hadoop 1.x version we have in our [path]_pom.xml_ as our '1.0' version.
+Tests may not all pass so you may need to pass [code]+-DskipTests+ unless you are inclined to fix the failing tests.
+
+.'dependencyManagement.dependencies.dependency.artifactId' fororg.apache.hbase:${compat.module}:test-jar with value '${compat.module}'does not match a valid id pattern
+[NOTE]
+====
+You will see ERRORs like the above title if you pass the _default_ profile; e.g.
+if you pass +hadoop.profile=1.1+ when building 0.96 or +hadoop.profile=2.0+ when building hadoop 0.98; just drop the hadoop.profile stipulation in this case to get your build to run again.
+This seems to be a maven pecularity that is probably fixable but we've not spent the time trying to figure it.
+====
+
+Similarly, for 3.0, you would just replace the profile value.
+Note that Hadoop-3.0.0-SNAPSHOT does not currently have a deployed maven artificat - you will need to build and install your own in your local maven repository if you want to run against this profile.
+
+In earilier versions of Apache HBase, you can build against older versions of Apache Hadoop, notably, Hadoop 0.22.x and 0.23.x.
+If you are running, for example HBase-0.94 and wanted to build against Hadoop 0.23.x, you would run with:
+
+[source,bourne]
+----
+mvn -Dhadoop.profile=22 ...
+----
+
+[[build.protobuf]]
+==== Build Protobuf
+
+You may need to change the protobuf definitions that reside in the [path]_hbase-protocol_ module or other modules.
+
+The protobuf files are located in [path]_hbase-protocol/src/main/protobuf_.
+For the change to be effective, you will need to regenerate the classes.
+You can use maven profile [code]+compile-protobuf+ to do this.
+
+[source,bourne]
+----
+mvn compile -Pcompile-protobuf
+----
+
+You may also want to define [var]+protoc.path+ for the protoc binary, using the following command:
+
+[source,bourne]
+----
+
+mvn compile -Pcompile-protobuf -Dprotoc.path=/opt/local/bin/protoc
+----
+
+Read the [path]_hbase-protocol/README.txt_ for more details.
+
+[[build.thrift]]
+==== Build Thrift
+
+You may need to change the thrift definitions that reside in the [path]_hbase-thrift_ module or other modules.
+
+The thrift files are located in [path]_hbase-thrift/src/main/resources_.
+For the change to be effective, you will need to regenerate the classes.
+You can use maven profile [code]+compile-thrift+ to do this.
+
+[source,bourne]
+----
+mvn compile -Pcompile-thrift
+----
+
+You may also want to define [var]+thrift.path+ for the thrift binary, using the following command:
+
+[source,bourne]
+----
+
+ mvn compile -Pcompile-thrift -Dthrift.path=/opt/local/bin/thrift
+----
+
+==== Build a Tarball
+
+You can build a tarball without going through the release process described in <>, by running the following command:
+
+----
+mvn -DskipTests clean install && mvn -DskipTests package assembly:single
+----
+
+The distribution tarball is built in [path]_hbase-assembly/target/hbase--bin.tar.gz_.
+
+[[build.gotchas]]
+==== Build Gotchas
+
+If you see [code]+Unable to find resource 'VM_global_library.vm'+, ignore it.
+Its not an error.
+It is link:http://jira.codehaus.org/browse/MSITE-286[officially
+ ugly] though.
+
+[[build.snappy]]
+==== Building in snappy compression support
+
+Pass [code]+-Psnappy+ to trigger the [code]+hadoop-snappy+ maven profile for building Google Snappy native libraries into HBase.
+See also <>
+
+[[releasing]]
+== Releasing Apache HBase
+
+.Building against HBase 1.x
+[NOTE]
+====
+HBase 1.x requires Java 7 to build.
+See <> for Java requirements per HBase release.
+====
+
+=== Building against HBase 0.96-0.98
+
+HBase 0.96.x will run on Hadoop 1.x or Hadoop 2.x.
+HBase 0.98 still runs on both, but HBase 0.98 deprecates use of Hadoop 1.
+HBase 1.x will _not_ run on Hadoop 1.
+In the following procedures, we make a distinction between HBase 1.x builds and the awkward process involved building HBase 0.96/0.98 for either Hadoop 1 or Hadoop 2 targets.
+
+You must choose which Hadoop to build against.
+It is not possible to build a single HBase binary that runs against both Hadoop 1 and Hadoop 2.
+Hadoop is included in the build, because it is needed to run HBase in standalone mode.
+Therefore, the set of modules included in the tarball changes, depending on the build target.
+To determine which HBase you have, look at the HBase version.
+The Hadoop version is embedded within it.
+
+Maven, our build system, natively does not allow a single product to be built against different dependencies.
+Also, Maven cannot change the set of included modules and write out the correct [path]_pom.xml_ files with appropriate dependencies, even using two build targets, one for Hadoop 1 and another for Hadoop 2.
+A prerequisite step is required, which takes as input the current [path]_pom.xml_s and generates Hadoop 1 or Hadoop 2 versions using a script in the [path]_dev-tools/_ directory, called [path]_generate-hadoopX-poms.sh_ where [replaceable]_X_ is either [literal]+1+ or [literal]+2+.
+You then reference these generated poms when you build.
+For now, just be aware of the difference between HBase 1.x builds and those of HBase 0.96-0.98.
+This difference is important to the build instructions.
+
+.Example [path]_~/.m2/settings.xml_ File
+====
+Publishing to maven requires you sign the artifacts you want to upload.
+For the build to sign them for you, you a properly configured [path]_settings.xml_ in your local repository under [path]_.m2_, such as the following.
+
+[source,xml]
+----
+
+
+
+
+ apache.snapshots.https
+ YOUR_APACHE_ID
+
+ YOUR_APACHE_PASSWORD
+
+
+
+
+
+ apache.releases.https
+ YOUR_APACHE_ID
+
+ YOUR_APACHE_PASSWORD
+
+
+
+
+
+ apache-release
+
+ YOUR_KEYNAME
+
+ YOUR_KEY_PASSWORD
+
+
+
+
+
+----
+====
+
+[[maven.release]]
+=== Making a Release Candidate
+
+NOTE: These instructions are for building HBase 1.0.x.
+For building earlier versions, the process is different.
+See this section under the respective release documentation folders.
+
+.Point Releases
+If you are making a point release (for example to quickly address a critical incompatability or security problem) off of a release branch instead of a development branch, the tagging instructions are slightly different.
+I'll prefix those special steps with _Point Release Only_.
+
+.Before You Begin
+Before you make a release candidate, do a practice run by deploying a snapshot.
+Before you start, check to be sure recent builds have been passing for the branch from where you are going to take your release.
+You should also have tried recent branch tips out on a cluster under load, perhaps by running the [code]+hbase-it+ integration test suite for a few hours to 'burn in' the near-candidate bits.
+
+.Point Release Only
+[NOTE]
+====
+At this point you should tag the previous release branch (ex: 0.96.1) with the new point release tag (e.g.
+0.96.1.1 tag). Any commits with changes for the point release should be appled to the new tag.
+====
+
+The Hadoop link:http://wiki.apache.org/hadoop/HowToRelease[How To
+ Release] wiki page is used as a model for most of the instructions below, and may have more detail on particular sections, so it is worth review.
+
+.Specifying the Heap Space for Maven on OSX
+[NOTE]
+====
+On OSX, you may need to specify the heap space for Maven commands, by setting the [var]+MAVEN_OPTS+ variable to [literal]+-Xmx3g+.
+You can prefix the variable to the Maven command, as in the following example:
+
+----
+MAVEN_OPTS="-Xmx2g" mvn package
+----
+
+You could also set this in an environment variable or alias in your shell.
+====
+
+
+NOTE: The script [path]_dev-support/make_rc.sh_ automates many of these steps.
+It does not do the modification of the [path]_CHANGES.txt_ for the release, the close of the staging repository in Apache Maven (human intervention is needed here), the checking of the produced artifacts to ensure they are 'good' -- e.g.
+extracting the produced tarballs, verifying that they look right, then starting HBase and checking that everything is running correctly, then the signing and pushing of the tarballs to link:http://people.apache.org[people.apache.org].
+The script handles everything else, and comes in handy.
+
+.Procedure: Release Procedure
+. Update the [path]_CHANGES.txt_ file and the POM files.
++
+Update [path]_CHANGES.txt_ with the changes since the last release.
+Make sure the URL to the JIRA points to the proper location which lists fixes for this release.
+Adjust the version in all the POM files appropriately.
+If you are making a release candidate, you must remove the [literal]+-SNAPSHOT+ label from all versions.
+If you are running this receipe to publish a snapshot, you must keep the [literal]+-SNAPSHOT+ suffix on the hbase version.
+The link:http://mojo.codehaus.org/versions-maven-plugin/[Versions
+ Maven Plugin] can be of use here.
+To set a version in all the many poms of the hbase multi-module project, use a command like the following:
++
+[source,bourne]
+----
+
+$ mvn clean org.codehaus.mojo:versions-maven-plugin:1.3.1:set -DnewVersion=0.96.0
+----
++
+Checkin the [path]_CHANGES.txt_ and any version changes.
+
+. Update the documentation.
++
+Update the documentation under [path]_src/main/docbkx_.
+This usually involves copying the latest from trunk and making version-particular adjustments to suit this release candidate version.
+
+. Build the source tarball.
++
+Now, build the source tarball.
+This tarball is Hadoop-version-independent.
+It is just the pure source code and documentation without a particular hadoop taint, etc.
+Add the [var]+-Prelease+ profile when building.
+It checks files for licenses and will fail the build if unlicensed files are present.
++
+[source,bourne]
+----
+
+$ mvn clean install -DskipTests assembly:single -Dassembly.file=hbase-assembly/src/main/assembly/src.xml -Prelease
+----
++
+Extract the tarball and make sure it looks good.
+A good test for the src tarball being 'complete' is to see if you can build new tarballs from this source bundle.
+If the source tarball is good, save it off to a _version directory_, a directory somewhere where you are collecting all of the tarballs you will publish as part of the release candidate.
+For example if you were building a hbase-0.96.0 release candidate, you might call the directory [path]_hbase-0.96.0RC0_.
+Later you will publish this directory as our release candidate up on link:people.apache.org/~YOU[people.apache.org/~YOU/].
+
+. Build the binary tarball.
++
+Next, build the binary tarball.
+Add the [var]+-Prelease+ profile when building.
+It checks files for licenses and will fail the build if unlicensed files are present.
+Do it in two steps.
++
+* First install into the local repository
++
+[source,bourne]
+----
+
+$ mvn clean install -DskipTests -Prelease
+----
+
+* Next, generate documentation and assemble the tarball.
++
+[source,bourne]
+----
+
+$ mvn install -DskipTests site assembly:single -Prelease
+----
+
++
+Otherwise, the build complains that hbase modules are not in the maven repository when you try to do it at once, especially on fresh repository.
+It seems that you need the install goal in both steps.
++
+Extract the generated tarball and check it out.
+Look at the documentation, see if it runs, etc.
+If good, copy the tarball to the above mentioned _version directory_.
+
+. Create a new tag.
++
+.Point Release Only
+[NOTE]
+====
+The following step that creates a new tag can be skipped since you've already created the point release tag
+====
++
+Tag the release at this point since it looks good.
+If you find an issue later, you can delete the tag and start over.
+Release needs to be tagged for the next step.
+
+. Deploy to the Maven Repository.
++
+Next, deploy HBase to the Apache Maven repository, using the [var]+apache-release+ profile instead of the [var]+release+ profile when running the +mvn
+ deploy+ command.
+This profile invokes the Apache pom referenced by our pom files, and also signs your artifacts published to Maven, as long as the [path]_settings.xml_ is configured correctly, as described in <>.
++
+[source,bourne]
+----
+
+$ mvn deploy -DskipTests -Papache-release
+----
++
+This command copies all artifacts up to a temporary staging Apache mvn repository in an 'open' state.
+More work needs to be done on these maven artifacts to make them generally available.
+
+. Make the Release Candidate available.
++
+The artifacts are in the maven repository in the staging area in the 'open' state.
+While in this 'open' state you can check out what you've published to make sure all is good.
+To do this, login at link:http://repository.apache.org[repository.apache.org] using your Apache ID.
+Find your artifacts in the staging repository.
+Browse the content.
+Make sure all artifacts made it up and that the poms look generally good.
+If it checks out, 'close' the repo.
+This will make the artifacts publically available.
+You will receive an email with the URL to give out for the temporary staging repository for others to use trying out this new release candidate.
+Include it in the email that announces the release candidate.
+Folks will need to add this repo URL to their local poms or to their local [path]_settings.xml_ file to pull the published release candidate artifacts.
+If the published artifacts are incomplete or have problems, just delete the 'open' staged artifacts.
++
+.hbase-downstreamer
+[NOTE]
+====
+See the link:https://github.com/saintstack/hbase-downstreamer[hbase-downstreamer] test for a simple example of a project that is downstream of HBase an depends on it.
+Check it out and run its simple test to make sure maven artifacts are properly deployed to the maven repository.
+Be sure to edit the pom to point to the proper staging repository.
+Make sure you are pulling from the repository when tests run and that you are not getting from your local repository, by either passing the [code]+-U+ flag or deleting your local repo content and check maven is pulling from remote out of the staging repository.
+====
++
+See link:http://www.apache.org/dev/publishing-maven-artifacts.html[Publishing Maven Artifacts] for some pointers on this maven staging process.
++
+NOTE: We no longer publish using the maven release plugin.
+Instead we do +mvn deploy+.
+It seems to give us a backdoor to maven release publishing.
+If there is no _-SNAPSHOT_ on the version string, then we are 'deployed' to the apache maven repository staging directory from which we can publish URLs for candidates and later, if they pass, publish as release (if a _-SNAPSHOT_ on the version string, deploy will put the artifacts up into apache snapshot repos).
++
+If the HBase version ends in [var]+-SNAPSHOT+, the artifacts go elsewhere.
+They are put into the Apache snapshots repository directly and are immediately available.
+Making a SNAPSHOT release, this is what you want to happen.
+
+. If you used the [path]_make_rc.sh_ script instead of doing
+ the above manually, do your sanity checks now.
++
+At this stage, you have two tarballs in your 'version directory' and a set of artifacts in a staging area of the maven repository, in the 'closed' state.
+These are publicly accessible in a temporary staging repository whose URL you should have gotten in an email.
+The above mentioned script, [path]_make_rc.sh_ does all of the above for you minus the check of the artifacts built, the closing of the staging repository up in maven, and the tagging of the release.
+If you run the script, do your checks at this stage verifying the src and bin tarballs and checking what is up in staging using hbase-downstreamer project.
+Tag before you start the build.
+You can always delete it if the build goes haywire.
+
+. Sign, upload, and 'stage' your version directory to link:http://people.apache.org[people.apache.org] (TODO:
+ There is a new location to stage releases using svnpubsub. See
+ (link:https://issues.apache.org/jira/browse/HBASE-10554[HBASE-10554 Please delete old releases from mirroring system]).
++
+If all checks out, next put the _version directory_ up on link:http://people.apache.org[people.apache.org].
+You will need to sign and fingerprint them before you push them up.
+In the _version directory_ run the following commands:
++
+[source,bourne]
+----
+
+$ for i in *.tar.gz; do echo $i; gpg --print-mds $i > $i.mds ; done
+$ for i in *.tar.gz; do echo $i; gpg --armor --output $i.asc --detach-sig $i ; done
+$ cd ..
+# Presuming our 'version directory' is named 0.96.0RC0, now copy it up to people.apache.org.
+$ rsync -av 0.96.0RC0 people.apache.org:public_html
+----
++
+Make sure the link:http://people.apache.org[people.apache.org] directory is showing and that the mvn repo URLs are good.
+Announce the release candidate on the mailing list and call a vote.
+
+
+[[maven.snapshot]]
+=== Publishing a SNAPSHOT to maven
+
+Make sure your [path]_settings.xml_ is set up properly, as in <>.
+Make sure the hbase version includes [var]+-SNAPSHOT+ as a suffix.
+Following is an example of publishing SNAPSHOTS of a release that had an hbase version of 0.96.0 in its poms.
+
+[source,bourne]
+----
+
+ $ mvn clean install -DskipTests javadoc:aggregate site assembly:single -Prelease
+ $ mvn -DskipTests deploy -Papache-release
+----
+
+The [path]_make_rc.sh_ script mentioned above (see <>) can help you publish [var]+SNAPSHOTS+.
+Make sure your [var]+hbase.version+ has a [var]+-SNAPSHOT+ suffix before running the script.
+It will put a snapshot up into the apache snapshot repository for you.
+
+[[hbase.rc.voting]]
+== Voting on Release Candidates
+
+Everyone is encouraged to try and vote on HBase release candidates.
+Only the votes of PMC members are binding.
+PMC members, please read this WIP doc on policy voting for a release candidate, link:https://github.com/rectang/asfrelease/blob/master/release.md[Release
+ Policy]. [quote]_Before casting +1 binding votes, individuals are required to
+ download the signed source code package onto their own hardware, compile it as
+ provided, and test the resulting executable on their own platform, along with also
+ validating cryptographic signatures and verifying that the package meets the
+ requirements of the ASF policy on releases._ Regards the latter, run +mvn apache-rat:check+ to verify all files are suitably licensed.
+See link:http://search-hadoop.com/m/DHED4dhFaU[HBase, mail # dev - On
+ recent discussion clarifying ASF release policy].
+for how we arrived at this process.
+
+[[documentation]]
+== Generating the HBase Reference Guide
+
+The manual is marked up using link:http://www.docbook.org/[docbook].
+We then use the link:http://code.google.com/p/docbkx-tools/[docbkx maven plugin] to transform the markup to html.
+This plugin is run when you specify the +site+ goal as in when you run +mvn site+ or you can call the plugin explicitly to just generate the manual by doing +mvn
+ docbkx:generate-html+.
+When you run +mvn site+, the documentation is generated twice, once to generate the multipage manual and then again for the single page manual, which is easier to search.
+See <> for more information on building the documentation.
+
+[[hbase.org]]
+== Updating link:http://hbase.apache.org[hbase.apache.org]
+
+[[hbase.org.site.contributing]]
+=== Contributing to hbase.apache.org
+
+See <> for more information on contributing to the documentation or website.
+
+[[hbase.org.site.publishing]]
+=== Publishing link:http://hbase.apache.org[hbase.apache.org]
+
+As of link:https://issues.apache.org/jira/browse/INFRA-5680[INFRA-5680 Migrate apache hbase website], to publish the website, build it using Maven, and then deploy it over a checkout of [path]_https://svn.apache.org/repos/asf/hbase/hbase.apache.org/trunk_ and check in your changes.
+The script [path]_dev-scripts/publish_hbase_website.sh_ is provided to automate this process and to be sure that stale files are removed from SVN.
+Review the script even if you decide to publish the website manually.
+Use the script as follows:
+
+----
+$ publish_hbase_website.sh -h
+Usage: publish_hbase_website.sh [-i | -a] [-g ] [-s ]
+ -h Show this message
+ -i Prompts the user for input
+ -a Does not prompt the user. Potentially dangerous.
+ -g The local location of the HBase git repository
+ -s The local location of the HBase svn checkout
+ Either --interactive or --silent is required.
+ Edit the script to set default Git and SVN directories.
+----
+
+NOTE: The SVN commit takes a long time.
+
+[[hbase.tests]]
+== Tests
+
+Developers, at a minimum, should familiarize themselves with the unit test detail; unit tests in HBase have a character not usually seen in other projects.
+
+This information is about unit tests for HBase itself.
+For developing unit tests for your HBase applications, see <>.
+
+[[hbase.moduletests]]
+=== Apache HBase Modules
+
+As of 0.96, Apache HBase is split into multiple modules.
+This creates "interesting" rules for how and where tests are written.
+If you are writing code for [class]+hbase-server+, see <> for how to write your tests.
+These tests can spin up a minicluster and will need to be categorized.
+For any other module, for example [class]+hbase-common+, the tests must be strict unit tests and just test the class under test - no use of the HBaseTestingUtility or minicluster is allowed (or even possible given the dependency tree).
+
+[[hbase.moduletest.shell]]
+==== Testing the HBase Shell
+
+The HBase shell and its tests are predominantly written in jruby.
+In order to make these tests run as a part of the standard build, there is a single JUnit test, [class]+TestShell+, that takes care of loading the jruby implemented tests and running them.
+You can run all of these tests from the top level with:
+
+[source,bourne]
+----
+
+ mvn clean test -Dtest=TestShell
+----
+
+Alternatively, you may limit the shell tests that run using the system variable [class]+shell.test+.
+This value should specify the ruby literal equivalent of a particular test case by name.
+For example, the tests that cover the shell commands for altering tables are contained in the test case [class]+AdminAlterTableTest+ and you can run them with:
+
+[source,bourne]
+----
+
+ mvn clean test -Dtest=TestShell -Dshell.test=/AdminAlterTableTest/
+----
+
+You may also use a link:http://docs.ruby-doc.com/docs/ProgrammingRuby/html/language.html#UJ[Ruby Regular Expression
+ literal] (in the [class]+/pattern/+ style) to select a set of test cases.
+You can run all of the HBase admin related tests, including both the normal administration and the security administration, with the command:
+
+[source,bourne]
+----
+
+ mvn clean test -Dtest=TestShell -Dshell.test=/.*Admin.*Test/
+----
+
+In the event of a test failure, you can see details by examining the XML version of the surefire report results
+
+[source,bourne]
+----
+
+ vim hbase-shell/target/surefire-reports/TEST-org.apache.hadoop.hbase.client.TestShell.xml
+----
+
+[[hbase.moduletest.run]]
+==== Running Tests in other Modules
+
+If the module you are developing in has no other dependencies on other HBase modules, then you can cd into that module and just run:
+
+[source,bourne]
+----
+mvn test
+----
+
+which will just run the tests IN THAT MODULE.
+If there are other dependencies on other modules, then you will have run the command from the ROOT HBASE DIRECTORY.
+This will run the tests in the other modules, unless you specify to skip the tests in that module.
+For instance, to skip the tests in the hbase-server module, you would run:
+
+[source,bourne]
+----
+mvn clean test -PskipServerTests
+----
+
+from the top level directory to run all the tests in modules other than hbase-server.
+Note that you can specify to skip tests in multiple modules as well as just for a single module.
+For example, to skip the tests in [class]+hbase-server+ and [class]+hbase-common+, you would run:
+
+[source,bourne]
+----
+mvn clean test -PskipServerTests -PskipCommonTests
+----
+
+Also, keep in mind that if you are running tests in the [class]+hbase-server+ module you will need to apply the maven profiles discussed in <> to get the tests to run properly.
+
+[[hbase.unittests]]
+=== Unit Tests
+
+Apache HBase unit tests are subdivided into four categories: small, medium, large, and integration with corresponding JUnit link:http://www.junit.org/node/581[categories]: [class]+SmallTests+, [class]+MediumTests+, [class]+LargeTests+, [class]+IntegrationTests+.
+JUnit categories are denoted using java annotations and look like this in your unit test code.
+
+[source,java]
+----
+...
+@Category(SmallTests.class)
+public class TestHRegionInfo {
+ @Test
+ public void testCreateHRegionInfoName() throws Exception {
+ // ...
+ }
+}
+----
+
+The above example shows how to mark a unit test as belonging to the [literal]+small+ category.
+All unit tests in HBase have a categorization.
+
+The first three categories, [literal]+small+, [literal]+medium+, and [literal]+large+, are for tests run when you type [code]+$ mvn
+ test+.
+In other words, these three categorizations are for HBase unit tests.
+The [literal]+integration+ category is not for unit tests, but for integration tests.
+These are run when you invoke [code]+$ mvn verify+.
+Integration tests are described in <>.
+
+HBase uses a patched maven surefire plugin and maven profiles to implement its unit test characterizations.
+
+Keep reading to figure which annotation of the set small, medium, and large to put on your new HBase unit test.
+
+.Categorizing Tests
+Small Tests (((SmallTests)))::
+ _Small_ tests are executed in a shared JVM.
+ We put in this category all the tests that can be executed quickly in a shared JVM.
+ The maximum execution time for a small test is 15 seconds, and small tests should not use a (mini)cluster.
+
+Medium Tests (((MediumTests)))::
+ _Medium_ tests represent tests that must be executed before proposing a patch.
+ They are designed to run in less than 30 minutes altogether, and are quite stable in their results.
+ They are designed to last less than 50 seconds individually.
+ They can use a cluster, and each of them is executed in a separate JVM.
+
+Large Tests (((LargeTests)))::
+ _Large_ tests are everything else.
+ They are typically large-scale tests, regression tests for specific bugs, timeout tests, performance tests.
+ They are executed before a commit on the pre-integration machines.
+ They can be run on the developer machine as well.
+
+Integration Tests (((IntegrationTests)))::
+ _Integration_ tests are system level tests.
+ See <> for more info.
+
+[[hbase.unittests.cmds]]
+=== Running tests
+
+[[hbase.unittests.cmds.test]]
+==== Default: small and medium category tests
+
+Running [code]+mvn test+ will execute all small tests in a single JVM (no fork) and then medium tests in a separate JVM for each test instance.
+Medium tests are NOT executed if there is an error in a small test.
+Large tests are NOT executed.
+There is one report for small tests, and one report for medium tests if they are executed.
+
+[[hbase.unittests.cmds.test.runalltests]]
+==== Running all tests
+
+Running [code]+mvn test -P runAllTests+ will execute small tests in a single JVM then medium and large tests in a separate JVM for each test.
+Medium and large tests are NOT executed if there is an error in a small test.
+Large tests are NOT executed if there is an error in a small or medium test.
+There is one report for small tests, and one report for medium and large tests if they are executed.
+
+[[hbase.unittests.cmds.test.localtests.mytest]]
+==== Running a single test or all tests in a package
+
+To run an individual test, e.g. [class]+MyTest+, rum [code]+mvn test -Dtest=MyTest+ You can also pass multiple, individual tests as a comma-delimited list: [code]+mvn test
+ -Dtest=MyTest1,MyTest2,MyTest3+ You can also pass a package, which will run all tests under the package: [code]+mvn test
+ '-Dtest=org.apache.hadoop.hbase.client.*'+
+
+When [code]+-Dtest+ is specified, the [code]+localTests+ profile will be used.
+It will use the official release of maven surefire, rather than our custom surefire plugin, and the old connector (The HBase build uses a patched version of the maven surefire plugin). Each junit test is executed in a separate JVM (A fork per test class). There is no parallelization when tests are running in this mode.
+You will see a new message at the end of the -report: [literal]+"[INFO] Tests are skipped"+.
+It's harmless.
+However, you need to make sure the sum of [code]+Tests run:+ in the [code]+Results
+ :+ section of test reports matching the number of tests you specified because no error will be reported when a non-existent test case is specified.
+
+[[hbase.unittests.cmds.test.profiles]]
+==== Other test invocation permutations
+
+Running `mvn test -P runSmallTests` will execute "small" tests only, using a single JVM.
+
+Running `mvn test -P runMediumTests` will execute "medium" tests only, launching a new JVM for each test-class.
+
+Running `mvn test -P runLargeTests` will execute "large" tests only, launching a new JVM for each test-class.
+
+For convenience, you can run `mvn test -P runDevTests` to execute both small and medium tests, using a single JVM.
+
+[[hbase.unittests.test.faster]]
+==== Running tests faster
+
+By default, [code]+$ mvn test -P runAllTests+ runs 5 tests in parallel.
+It can be increased on a developer's machine.
+Allowing that you can have 2 tests in parallel per core, and you need about 2GB of memory per test (at the extreme), if you have an 8 core, 24GB box, you can have 16 tests in parallel.
+but the memory available limits it to 12 (24/2), To run all tests with 12 tests in parallel, do this: +mvn test -P runAllTests
+ -Dsurefire.secondPartForkCount=12+.
+If using a version earlier than 2.0, do: +mvn test -P runAllTests -Dsurefire.secondPartThreadCount=12
+ +.
+To increase the speed, you can as well use a ramdisk.
+You will need 2GB of memory to run all tests.
+You will also need to delete the files between two test run.
+The typical way to configure a ramdisk on Linux is:
+
+----
+$ sudo mkdir /ram2G
+sudo mount -t tmpfs -o size=2048M tmpfs /ram2G
+----
+
+You can then use it to run all HBase tests on 2.0 with the command:
+
+----
+mvn test
+ -P runAllTests -Dsurefire.secondPartForkCount=12
+ -Dtest.build.data.basedirectory=/ram2G
+----
+
+On earlier versions, use:
+
+----
+mvn test
+ -P runAllTests -Dsurefire.secondPartThreadCount=12
+ -Dtest.build.data.basedirectory=/ram2G
+----
+
+[[hbase.unittests.cmds.test.hbasetests]]
+==== +hbasetests.sh+
+
+It's also possible to use the script +hbasetests.sh+.
+This script runs the medium and large tests in parallel with two maven instances, and provides a single report.
+This script does not use the hbase version of surefire so no parallelization is being done other than the two maven instances the script sets up.
+It must be executed from the directory which contains the [path]_pom.xml_.
+
+For example running +./dev-support/hbasetests.sh+ will execute small and medium tests.
+Running +./dev-support/hbasetests.sh
+ runAllTests+ will execute all tests.
+Running +./dev-support/hbasetests.sh replayFailed+ will rerun the failed tests a second time, in a separate jvm and without parallelisation.
+
+[[hbase.unittests.resource.checker]]
+==== Test Resource Checker(((Test ResourceChecker)))
+
+A custom Maven SureFire plugin listener checks a number of resources before and after each HBase unit test runs and logs its findings at the end of the test output files which can be found in [path]_target/surefire-reports_ per Maven module (Tests write test reports named for the test class into this directory.
+Check the [path]_*-out.txt_ files). The resources counted are the number of threads, the number of file descriptors, etc.
+If the number has increased, it adds a _LEAK?_ comment in the logs.
+As you can have an HBase instance running in the background, some threads can be deleted/created without any specific action in the test.
+However, if the test does not work as expected, or if the test should not impact these resources, it's worth checking these log lines [computeroutput]+...hbase.ResourceChecker(157): before...+ and [computeroutput]+...hbase.ResourceChecker(157): after...+.
+For example:
+
+----
+2012-09-26 09:22:15,315 INFO [pool-1-thread-1]
+hbase.ResourceChecker(157): after:
+regionserver.TestColumnSeeking#testReseeking Thread=65 (was 65),
+OpenFileDescriptor=107 (was 107), MaxFileDescriptor=10240 (was 10240),
+ConnectionCount=1 (was 1)
+----
+
+[[hbase.tests.writing]]
+=== Writing Tests
+
+[[hbase.tests.rules]]
+==== General rules
+
+* As much as possible, tests should be written as category small tests.
+* All tests must be written to support parallel execution on the same machine, hence they should not use shared resources as fixed ports or fixed file names.
+* Tests should not overlog.
+ More than 100 lines/second makes the logs complex to read and use i/o that are hence not available for the other tests.
+* Tests can be written with [class]+HBaseTestingUtility+.
+ This class offers helper functions to create a temp directory and do the cleanup, or to start a cluster.
+
+[[hbase.tests.categories]]
+==== Categories and execution time
+
+* All tests must be categorized, if not they could be skipped.
+* All tests should be written to be as fast as possible.
+* Small category tests should last less than 15 seconds, and must not have any side effect.
+* Medium category tests should last less than 50 seconds.
+* Large category tests should last less than 3 minutes.
+ This should ensure a good parallelization for people using it, and ease the analysis when the test fails.
+
+[[hbase.tests.sleeps]]
+==== Sleeps in tests
+
+Whenever possible, tests should not use [method]+Thread.sleep+, but rather waiting for the real event they need.
+This is faster and clearer for the reader.
+Tests should not do a [method]+Thread.sleep+ without testing an ending condition.
+This allows understanding what the test is waiting for.
+Moreover, the test will work whatever the machine performance is.
+Sleep should be minimal to be as fast as possible.
+Waiting for a variable should be done in a 40ms sleep loop.
+Waiting for a socket operation should be done in a 200 ms sleep loop.
+
+[[hbase.tests.cluster]]
+==== Tests using a cluster
+
+Tests using a HRegion do not have to start a cluster: A region can use the local file system.
+Start/stopping a cluster cost around 10 seconds.
+They should not be started per test method but per test class.
+Started cluster must be shutdown using [method]+HBaseTestingUtility#shutdownMiniCluster+, which cleans the directories.
+As most as possible, tests should use the default settings for the cluster.
+When they don't, they should document it.
+This will allow to share the cluster later.
+
+[[integration.tests]]
+=== Integration Tests
+
+HBase integration/system tests are tests that are beyond HBase unit tests.
+They are generally long-lasting, sizeable (the test can be asked to 1M rows or 1B rows), targetable (they can take configuration that will point them at the ready-made cluster they are to run against; integration tests do not include cluster start/stop code), and verifying success, integration tests rely on public APIs only; they do not attempt to examine server internals asserting success/fail.
+Integration tests are what you would run when you need to more elaborate proofing of a release candidate beyond what unit tests can do.
+They are not generally run on the Apache Continuous Integration build server, however, some sites opt to run integration tests as a part of their continuous testing on an actual cluster.
+
+Integration tests currently live under the [path]_src/test_ directory in the hbase-it submodule and will match the regex: [path]_**/IntegrationTest*.java_.
+All integration tests are also annotated with [code]+@Category(IntegrationTests.class)+.
+
+Integration tests can be run in two modes: using a mini cluster, or against an actual distributed cluster.
+Maven failsafe is used to run the tests using the mini cluster.
+IntegrationTestsDriver class is used for executing the tests against a distributed cluster.
+Integration tests SHOULD NOT assume that they are running against a mini cluster, and SHOULD NOT use private API's to access cluster state.
+To interact with the distributed or mini cluster uniformly, [code]+IntegrationTestingUtility+, and [code]+HBaseCluster+ classes, and public client API's can be used.
+
+On a distributed cluster, integration tests that use ChaosMonkey or otherwise manipulate services thru cluster manager (e.g.
+restart regionservers) use SSH to do it.
+To run these, test process should be able to run commands on remote end, so ssh should be configured accordingly (for example, if HBase runs under hbase user in your cluster, you can set up passwordless ssh for that user and run the test also under it). To facilitate that, [code]+hbase.it.clustermanager.ssh.user+, [code]+hbase.it.clustermanager.ssh.opts+ and [code]+hbase.it.clustermanager.ssh.cmd+ configuration settings can be used.
+"User" is the remote user that cluster manager should use to perform ssh commands.
+"Opts" contains additional options that are passed to SSH (for example, "-i /tmp/my-key"). Finally, if you have some custom environment setup, "cmd" is the override format for the entire tunnel (ssh) command.
+The default string is {[code]+/usr/bin/ssh %1$s %2$s%3$s%4$s "%5$s"+} and is a good starting point.
+This is a standard Java format string with 5 arguments that is used to execute the remote command.
+The argument 1 (%1$s) is SSH options set the via opts setting or via environment variable, 2 is SSH user name, 3 is "@" if username is set or "" otherwise, 4 is the target host name, and 5 is the logical command to execute (that may include single quotes, so don't use them). For example, if you run the tests under non-hbase user and want to ssh as that user and change to hbase on remote machine, you can use {[code]+/usr/bin/ssh %1$s %2$s%3$s%4$s "su hbase - -c
+ \"%5$s\""+}. That way, to kill RS (for example) integration tests may run {[code]+/usr/bin/ssh some-hostname "su hbase - -c \"ps aux | ... | kill
+ ...\""+}. The command is logged in the test logs, so you can verify it is correct for your environment.
+
+To disable the running of Integration Tests, pass the following profile on the command line [code]+-PskipIntegrationTests+.
+For example,
+[source]
+----
+$ mvn clean install test -Dtest=TestZooKeeper -PskipIntegrationTests
+----
+
+[[maven.build.commands.integration.tests.mini]]
+==== Running integration tests against mini cluster
+
+HBase 0.92 added a [var]+verify+ maven target.
+Invoking it, for example by doing [code]+mvn verify+, will run all the phases up to and including the verify phase via the maven link:http://maven.apache.org/plugins/maven-failsafe-plugin/[failsafe
+ plugin], running all the above mentioned HBase unit tests as well as tests that are in the HBase integration test group.
+After you have completed +mvn install -DskipTests+ You can run just the integration tests by invoking:
+
+[source,bourne]
+----
+
+cd hbase-it
+mvn verify
+----
+
+If you just want to run the integration tests in top-level, you need to run two commands.
+First: +mvn failsafe:integration-test+ This actually runs ALL the integration tests.
+
+NOTE: This command will always output [code]+BUILD SUCCESS+ even if there are test failures.
+
+At this point, you could grep the output by hand looking for failed tests.
+However, maven will do this for us; just use: +mvn
+ failsafe:verify+ The above command basically looks at all the test results (so don't remove the 'target' directory) for test failures and reports the results.
+
+[[maven.build.commands.integration.tests2]]
+===== Running a subset of Integration tests
+
+This is very similar to how you specify running a subset of unit tests (see above), but use the property [code]+it.test+ instead of [code]+test+.
+To just run [class]+IntegrationTestClassXYZ.java+, use: +mvn
+ failsafe:integration-test -Dit.test=IntegrationTestClassXYZ+ The next thing you might want to do is run groups of integration tests, say all integration tests that are named IntegrationTestClassX*.java: +mvn failsafe:integration-test -Dit.test=*ClassX*+ This runs everything that is an integration test that matches *ClassX*. This means anything matching: "**/IntegrationTest*ClassX*". You can also run multiple groups of integration tests using comma-delimited lists (similar to unit tests). Using a list of matches still supports full regex matching for each of the groups.This would look something like: +mvn
+ failsafe:integration-test -Dit.test=*ClassX*, *ClassY+
+
+[[maven.build.commands.integration.tests.distributed]]
+==== Running integration tests against distributed cluster
+
+If you have an already-setup HBase cluster, you can launch the integration tests by invoking the class [code]+IntegrationTestsDriver+.
+You may have to run test-compile first.
+The configuration will be picked by the bin/hbase script.
+[source,bourne]
+----
+mvn test-compile
+----
+Then launch the tests with:
+
+[source,bourne]
+----
+bin/hbase [--config config_dir] org.apache.hadoop.hbase.IntegrationTestsDriver
+----
+
+Pass [code]+-h+ to get usage on this sweet tool.
+Running the IntegrationTestsDriver without any argument will launch tests found under [code]+hbase-it/src/test+, having [code]+@Category(IntegrationTests.class)+ annotation, and a name starting with [code]+IntegrationTests+.
+See the usage, by passing -h, to see how to filter test classes.
+You can pass a regex which is checked against the full class name; so, part of class name can be used.
+IntegrationTestsDriver uses Junit to run the tests.
+Currently there is no support for running integration tests against a distributed cluster using maven (see link:https://issues.apache.org/jira/browse/HBASE-6201[HBASE-6201]).
+
+The tests interact with the distributed cluster by using the methods in the [code]+DistributedHBaseCluster+ (implementing [code]+HBaseCluster+) class, which in turn uses a pluggable [code]+ClusterManager+.
+Concrete implementations provide actual functionality for carrying out deployment-specific and environment-dependent tasks (SSH, etc). The default [code]+ClusterManager+ is [code]+HBaseClusterManager+, which uses SSH to remotely execute start/stop/kill/signal commands, and assumes some posix commands (ps, etc). Also assumes the user running the test has enough "power" to start/stop servers on the remote machines.
+By default, it picks up [code]+HBASE_SSH_OPTS, HBASE_HOME,
+ HBASE_CONF_DIR+ from the env, and uses [code]+bin/hbase-daemon.sh+ to carry out the actions.
+Currently tarball deployments, deployments which uses hbase-daemons.sh, and link:http://incubator.apache.org/ambari/[Apache Ambari] deployments are supported.
+/etc/init.d/ scripts are not supported for now, but it can be easily added.
+For other deployment options, a ClusterManager can be implemented and plugged in.
+
+[[maven.build.commands.integration.tests.destructive]]
+==== Destructive integration / system tests
+
+In 0.96, a tool named [code]+ChaosMonkey+ has been introduced.
+It is modeled after the link:http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html[same-named tool by Netflix].
+Some of the tests use ChaosMonkey to simulate faults in the running cluster in the way of killing random servers, disconnecting servers, etc.
+ChaosMonkey can also be used as a stand-alone tool to run a (misbehaving) policy while you are running other tests.
+
+ChaosMonkey defines Action's and Policy's.
+Actions are sequences of events.
+We have at least the following actions:
+
+* Restart active master (sleep 5 sec)
+* Restart random regionserver (sleep 5 sec)
+* Restart random regionserver (sleep 60 sec)
+* Restart META regionserver (sleep 5 sec)
+* Restart ROOT regionserver (sleep 5 sec)
+* Batch restart of 50% of regionservers (sleep 5 sec)
+* Rolling restart of 100% of regionservers (sleep 5 sec)
+
+Policies on the other hand are responsible for executing the actions based on a strategy.
+The default policy is to execute a random action every minute based on predefined action weights.
+ChaosMonkey executes predefined named policies until it is stopped.
+More than one policy can be active at any time.
+
+To run ChaosMonkey as a standalone tool deploy your HBase cluster as usual.
+ChaosMonkey uses the configuration from the bin/hbase script, thus no extra configuration needs to be done.
+You can invoke the ChaosMonkey by running:
+
+[source,bourne]
+----
+bin/hbase org.apache.hadoop.hbase.util.ChaosMonkey
+----
+
+This will output smt like:
+
+----
+
+12/11/19 23:21:57 INFO util.ChaosMonkey: Using ChaosMonkey Policy: class org.apache.hadoop.hbase.util.ChaosMonkey$PeriodicRandomActionPolicy, period:60000
+12/11/19 23:21:57 INFO util.ChaosMonkey: Sleeping for 26953 to add jitter
+12/11/19 23:22:24 INFO util.ChaosMonkey: Performing action: Restart active master
+12/11/19 23:22:24 INFO util.ChaosMonkey: Killing master:master.example.com,60000,1353367210440
+12/11/19 23:22:24 INFO hbase.HBaseCluster: Aborting Master: master.example.com,60000,1353367210440
+12/11/19 23:22:24 INFO hbase.ClusterManager: Executing remote command: ps aux | grep master | grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s SIGKILL , hostname:master.example.com
+12/11/19 23:22:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:22:25 INFO hbase.HBaseCluster: Waiting service:master to stop: master.example.com,60000,1353367210440
+12/11/19 23:22:25 INFO hbase.ClusterManager: Executing remote command: ps aux | grep master | grep -v grep | tr -s ' ' | cut -d ' ' -f2 , hostname:master.example.com
+12/11/19 23:22:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:22:25 INFO util.ChaosMonkey: Killed master server:master.example.com,60000,1353367210440
+12/11/19 23:22:25 INFO util.ChaosMonkey: Sleeping for:5000
+12/11/19 23:22:30 INFO util.ChaosMonkey: Starting master:master.example.com
+12/11/19 23:22:30 INFO hbase.HBaseCluster: Starting Master on: master.example.com
+12/11/19 23:22:30 INFO hbase.ClusterManager: Executing remote command: /homes/enis/code/hbase-0.94/bin/../bin/hbase-daemon.sh --config /homes/enis/code/hbase-0.94/bin/../conf start master , hostname:master.example.com
+12/11/19 23:22:31 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:starting master, logging to /homes/enis/code/hbase-0.94/bin/../logs/hbase-enis-master-master.example.com.out
+....
+12/11/19 23:22:33 INFO util.ChaosMonkey: Started master: master.example.com,60000,1353367210440
+12/11/19 23:22:33 INFO util.ChaosMonkey: Sleeping for:51321
+12/11/19 23:23:24 INFO util.ChaosMonkey: Performing action: Restart random region server
+12/11/19 23:23:24 INFO util.ChaosMonkey: Killing region server:rs3.example.com,60020,1353367027826
+12/11/19 23:23:24 INFO hbase.HBaseCluster: Aborting RS: rs3.example.com,60020,1353367027826
+12/11/19 23:23:24 INFO hbase.ClusterManager: Executing remote command: ps aux | grep regionserver | grep -v grep | tr -s ' ' | cut -d ' ' -f2 | xargs kill -s SIGKILL , hostname:rs3.example.com
+12/11/19 23:23:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:23:25 INFO hbase.HBaseCluster: Waiting service:regionserver to stop: rs3.example.com,60020,1353367027826
+12/11/19 23:23:25 INFO hbase.ClusterManager: Executing remote command: ps aux | grep regionserver | grep -v grep | tr -s ' ' | cut -d ' ' -f2 , hostname:rs3.example.com
+12/11/19 23:23:25 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:
+12/11/19 23:23:25 INFO util.ChaosMonkey: Killed region server:rs3.example.com,60020,1353367027826. Reported num of rs:6
+12/11/19 23:23:25 INFO util.ChaosMonkey: Sleeping for:60000
+12/11/19 23:24:25 INFO util.ChaosMonkey: Starting region server:rs3.example.com
+12/11/19 23:24:25 INFO hbase.HBaseCluster: Starting RS on: rs3.example.com
+12/11/19 23:24:25 INFO hbase.ClusterManager: Executing remote command: /homes/enis/code/hbase-0.94/bin/../bin/hbase-daemon.sh --config /homes/enis/code/hbase-0.94/bin/../conf start regionserver , hostname:rs3.example.com
+12/11/19 23:24:26 INFO hbase.ClusterManager: Executed remote command, exit code:0 , output:starting regionserver, logging to /homes/enis/code/hbase-0.94/bin/../logs/hbase-enis-regionserver-rs3.example.com.out
+
+12/11/19 23:24:27 INFO util.ChaosMonkey: Started region server:rs3.example.com,60020,1353367027826. Reported num of rs:6
+----
+
+As you can see from the log, ChaosMonkey started the default PeriodicRandomActionPolicy, which is configured with all the available actions, and ran RestartActiveMaster and RestartRandomRs actions.
+ChaosMonkey tool, if run from command line, will keep on running until the process is killed.
+
+[[chaos.monkey.properties]]
+==== Passing individual Chaos Monkey per-test Settings/Properties
+
+Since HBase version 1.0.0 (link:https://issues.apache.org/jira/browse/HBASE-11348[HBASE-11348]), the chaos monkeys is used to run integration tests can be configured per test run.
+Users can create a java properties file and and pass this to the chaos monkey with timing configurations.
+The properties file needs to be in the HBase classpath.
+The various properties that can be configured and their default values can be found listed in the [class]+org.apache.hadoop.hbase.chaos.factories.MonkeyConstants+ class.
+If any chaos monkey configuration is missing from the property file, then the default values are assumed.
+For example:
+
+[source,bourne]
+----
+
+$bin/hbase org.apache.hadoop.hbase.IntegrationTestIngest -m slowDeterministic -monkeyProps monkey.properties
+----
+
+The above command will start the integration tests and chaos monkey passing the properties file [path]_monkey.properties_.
+Here is an example chaos monkey file:
+
+[source]
+----
+
+sdm.action1.period=120000
+sdm.action2.period=40000
+move.regions.sleep.time=80000
+move.regions.max.time=1000000
+move.regions.sleep.time=80000
+batch.restart.rs.ratio=0.4f
+----
+
+[[developing]]
+== Developer Guidelines
+
+=== Codelines
+
+Most development is done on the master branch, which is named [literal]+master+ in the Git repository.
+Previously, HBase used Subversion, in which the master branch was called [literal]+TRUNK+.
+Branches exist for minor releases, and important features and bug fixes are often back-ported.
+
+=== Release Managers
+
+Each maintained release branch has a release manager, who volunteers to coordinate new features and bug fixes are backported to that release.
+The release managers are link:https://hbase.apache.org/team-list.html[committers].
+If you would like your feature or bug fix to be included in a given release, communicate with that release manager.
+If this list goes out of date or you can't reach the listed person, reach out to someone else on the list.
+
+NOTE: End-of-life releases are not included in this list.
+
+.Release Managers
+[cols="1,1", options="header"]
+|===
+| Release
+| Release Manager
+| 0.98
+| Andrew Purtell
+
+| 1.0
+| Enis Soztutar
+|===
+
+[[code.standards]]
+=== Code Standards
+
+See <> and <>.
+
+==== Interface Classifications
+
+Interfaces are classified both by audience and by stability level.
+These labels appear at the head of a class.
+The conventions followed by HBase are inherited by its parent project, Hadoop.
+
+The following interface classifications are commonly used:
+
+.InterfaceAudience
+[code]+@InterfaceAudience.Public+::
+ APIs for users and HBase applications.
+ These APIs will be deprecated through major versions of HBase.
+
+[code]+@InterfaceAudience.Private+::
+ APIs for HBase internals developers.
+ No guarantees on compatibility or availability in future versions.
+ Private interfaces do not need an [code]+@InterfaceStability+ classification.
+
+[code]+@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)+::
+ APIs for HBase coprocessor writers.
+ As of HBase 0.92/0.94/0.96/0.98 this api is still unstable.
+ No guarantees on compatibility with future versions.
+
+No [code]+@InterfaceAudience+ Classification::
+ Packages without an [code]+@InterfaceAudience+ label are considered private.
+ Mark your new packages if publicly accessible.
+
+.Excluding Non-Public Interfaces from API Documentation
+[NOTE]
+====
+Only interfaces classified [code]+@InterfaceAudience.Public+ should be included in API documentation (Javadoc). Committers must add new package excludes [code]+ExcludePackageNames+ section of the [path]_pom.xml_ for new packages which do not contain public classes.
+====
+
+.@InterfaceStability
+[code]+@InterfaceStability+ is important for packages marked [code]+@InterfaceAudience.Public+.
+
+[code]+@InterfaceStability.Stable+::
+ Public packages marked as stable cannot be changed without a deprecation path or a very good reason.
+
+[code]+@InterfaceStability.Unstable+::
+ Public packages marked as unstable can be changed without a deprecation path.
+
+[code]+@InterfaceStability.Evolving+::
+ Public packages marked as evolving may be changed, but it is discouraged.
+
+No [code]+@InterfaceStability+ Label::
+ Public classes with no [code]+@InterfaceStability+ label are discouraged, and should be considered implicitly unstable.
+
+If you are unclear about how to mark packages, ask on the development list.
+
+[[common.patch.feedback]]
+==== Code Formatting Conventions
+
+Please adhere to the following guidelines so that your patches can be reviewed more quickly.
+These guidelines have been developed based upon common feedback on patches from new contributors.
+
+See the link:http://www.oracle.com/technetwork/java/index-135089.html[Code
+ Conventions for the Java Programming Language] for more information on coding conventions in Java.
+
+[[common.patch.feedback.space.invaders]]
+===== Space Invaders
+
+Do not use extra spaces around brackets.
+Use the second style, rather than the first.
+
+[source,java]
+----
+
+if ( foo.equals( bar ) ) { // don't do this
+----
+
+[source,java]
+----
+
+if (foo.equals(bar)) {
+----
+
+[source,java]
+----
+
+foo = barArray[ i ]; // don't do this
+----
+
+[source,java]
+----
+
+foo = barArray[i];
+----
+
+[[common.patch.feedback.autogen]]
+===== Auto Generated Code
+
+Auto-generated code in Eclipse often uses bad variable names such as [literal]+arg0+.
+Use more informative variable names.
+Use code like the second example here.
+
+[source,java]
+----
+
+ public void readFields(DataInput arg0) throws IOException { // don't do this
+ foo = arg0.readUTF(); // don't do this
+----
+
+[source,java]
+----
+
+ public void readFields(DataInput di) throws IOException {
+ foo = di.readUTF();
+----
+
+[[common.patch.feedback.longlines]]
+===== Long Lines
+
+Keep lines less than 100 characters.
+You can configure your IDE to do this automatically.
+
+[source,java]
+----
+
+Bar bar = foo.veryLongMethodWithManyArguments(argument1, argument2, argument3, argument4, argument5, argument6, argument7, argument8, argument9); // don't do this
+----
+
+[source,java]
+----
+
+Bar bar = foo.veryLongMethodWithManyArguments(
+ argument1, argument2, argument3,argument4, argument5, argument6, argument7, argument8, argument9);
+----
+
+[[common.patch.feedback.trailingspaces]]
+===== Trailing Spaces
+
+Trailing spaces are a common problem.
+Be sure there is a line break after the end of your code, and avoid lines with nothing but whitespace.
+This makes diffs more meaningful.
+You can configure your IDE to help with this.
+
+[source,java]
+----
+
+Bar bar = foo.getBar(); <--- imagine there is an extra space(s) after the semicolon.
+----
+
+[[common.patch.feedback.javadoc]]
+===== API Documentation (Javadoc)
+
+This is also a very common feedback item.
+Don't forget Javadoc!
+
+Javadoc warnings are checked during precommit.
+If the precommit tool gives you a '-1', please fix the javadoc issue.
+Your patch won't be committed if it adds such warnings.
+
+[[common.patch.feedback.findbugs]]
+===== Findbugs
+
+[code]+Findbugs+ is used to detect common bugs pattern.
+It is checked during the precommit build by Apache's Jenkins.
+If errors are found, please fix them.
+You can run findbugs locally with +mvn
+ findbugs:findbugs+, which will generate the [code]+findbugs+ files locally.
+Sometimes, you may have to write code smarter than [code]+findbugs+.
+You can annotate your code to tell [code]+findbugs+ you know what you're doing, by annotating your class with the following annotation:
+
+[source,java]
+----
+@edu.umd.cs.findbugs.annotations.SuppressWarnings(
+value="HE_EQUALS_USE_HASHCODE",
+justification="I know what I'm doing")
+----
+
+It is important to use the Apache-licensed version of the annotations.
+
+[[common.patch.feedback.javadoc.defaults]]
+===== Javadoc - Useless Defaults
+
+Don't just leave the @param arguments the way your IDE generated them.:
+
+[source,java]
+----
+
+ /**
+ *
+ * @param bar <---- don't do this!!!!
+ * @return <---- or this!!!!
+ */
+ public Foo getFoo(Bar bar);
+----
+
+Either add something descriptive to the @[code]+param+ and @[code]+return+ lines, or just remove them.
+The preference is to add something descriptive and useful.
+
+[[common.patch.feedback.onething]]
+===== One Thing At A Time, Folks
+
+If you submit a patch for one thing, don't do auto-reformatting or unrelated reformatting of code on a completely different area of code.
+
+Likewise, don't add unrelated cleanup or refactorings outside the scope of your Jira.
+
+[[common.patch.feedback.tests]]
+===== Ambigious Unit Tests
+
+Make sure that you're clear about what you are testing in your unit tests and why.
+
+[[common.patch.feedback.writable]]
+===== Implementing Writable
+
+.Applies pre-0.96 only
+[NOTE]
+====
+In 0.96, HBase moved to protocol buffers (protobufs). The below section on Writables applies to 0.94.x and previous, not to 0.96 and beyond.
+====
+
+Every class returned by RegionServers must implement the [code]+Writable+ interface.
+If you are creating a new class that needs to implement this interface, do not forget the default constructor.
+
+[[design.invariants]]
+=== Invariants
+
+We don't have many but what we have we list below.
+All are subject to challenge of course but until then, please hold to the rules of the road.
+
+[[design.invariants.zk.data]]
+==== No permanent state in ZooKeeper
+
+ZooKeeper state should transient (treat it like memory). If ZooKeeper state is deleted, hbase should be able to recover and essentially be in the same state.
+
+* .ExceptionsThere are currently a few exceptions that we need to fix around whether a table is enabled or disabled.
+* Replication data is currently stored only in ZooKeeper.
+ Deleting ZooKeeper data related to replication may cause replication to be disabled.
+ Do not delete the replication tree, [path]_/hbase/replication/_.
++
+WARNING: Replication may be disrupted and data loss may occur if you delete the replication tree ([path]_/hbase/replication/_) from ZooKeeper.
+Follow progress on this issue at link:https://issues.apache.org/jira/browse/HBASE-10295[HBASE-10295].
+
+
+[[run.insitu]]
+=== Running In-Situ
+
+If you are developing Apache HBase, frequently it is useful to test your changes against a more-real cluster than what you find in unit tests.
+In this case, HBase can be run directly from the source in local-mode.
+All you need to do is run:
+
+[source,bourne]
+----
+${HBASE_HOME}/bin/start-hbase.sh
+----
+
+This will spin up a full local-cluster, just as if you had packaged up HBase and installed it on your machine.
+
+Keep in mind that you will need to have installed HBase into your local maven repository for the in-situ cluster to work properly.
+That is, you will need to run:
+
+[source,bourne]
+----
+mvn clean install -DskipTests
+----
+
+to ensure that maven can find the correct classpath and dependencies.
+Generally, the above command is just a good thing to try running first, if maven is acting oddly.
+
+[[add.metrics]]
+=== Adding Metrics
+
+After adding a new feature a developer might want to add metrics.
+HBase exposes metrics using the Hadoop Metrics 2 system, so adding a new metric involves exposing that metric to the hadoop system.
+Unfortunately the API of metrics2 changed from hadoop 1 to hadoop 2.
+In order to get around this a set of interfaces and implementations have to be loaded at runtime.
+To get an in-depth look at the reasoning and structure of these classes you can read the blog post located link:https://blogs.apache.org/hbase/entry/migration_to_the_new_metrics[here].
+To add a metric to an existing MBean follow the short guide below:
+
+==== Add Metric name and Function to Hadoop Compat Interface.
+
+Inside of the source interface the corresponds to where the metrics are generated (eg MetricsMasterSource for things coming from HMaster) create new static strings for metric name and description.
+Then add a new method that will be called to add new reading.
+
+==== Add the Implementation to Both Hadoop 1 and Hadoop 2 Compat modules.
+
+Inside of the implementation of the source (eg.
+MetricsMasterSourceImpl in the above example) create a new histogram, counter, gauge, or stat in the init method.
+Then in the method that was added to the interface wire up the parameter passed in to the histogram.
+
+Now add tests that make sure the data is correctly exported to the metrics 2 system.
+For this the MetricsAssertHelper is provided.
+
+[[git.best.practices]]
+=== Git Best Practices
+
+* Use the correct method to create patches.
+ See <>.
+* Avoid git merges.
+ Use [code]+git pull --rebase+ or [code]+git
+ fetch+ followed by [code]+git rebase+.
+* Do not use [code]+git push --force+.
+ If the push does not work, fix the problem or ask for help.
+
+Please contribute to this document if you think of other Git best practices.
+
+==== [code]+rebase_all_git_branches.sh+
+
+The [path]_dev-support/rebase_all_git_branches.sh_ script is provided to help keep your Git repository clean.
+Use the [code]+-h+ parameter to get usage instructions.
+The script automatically refreshes your tracking branches, attempts an automatic rebase of each local branch against its remote branch, and gives you the option to delete any branch which represents a closed [literal]+HBASE-+ JIRA.
+The script has one optional configuration option, the location of your Git directory.
+You can set a default by editing the script.
+Otherwise, you can pass the git directory manually by using the [code]+-d+ parameter, followed by an absolute or relative directory name, or even '.' for the current working directory.
+The script checks the directory for sub-directory called [path]_.git/_, before proceeding.
+
+[[submitting.patches]]
+=== Submitting Patches
+
+HBase moved to GIT from SVN.
+Until we develop our own documentation for how to contribute patches in our new GIT context, caveat the fact that we have a different branching model and that we don't currently do the merge practice described in the following, the link:http://accumulo.apache.org/git.html[accumulo doc
+ on how to contribute and develop] after our move to GIT is worth a read.
+See also <>.
+
+If you are new to submitting patches to open source or new to submitting patches to Apache, start by reading the link:http://commons.apache.org/patches.html[On Contributing
+ Patches] page from link:http://commons.apache.org/[Apache
+ Commons Project].
+It provides a nice overview that applies equally to the Apache HBase Project.
+
+[[submitting.patches.create]]
+==== Create Patch
+
+The script [path]_dev-support/make_patch.sh_ has been provided to help you adhere to patch-creation guidelines.
+The script has the following syntax:
+
+----
+$ make_patch.sh [-a] [-p ]
+----
+
+. If you do not pass a [code]+patch_dir+, the script defaults to [path]_~/patches/_.
+ If the [code]+patch_dir+ does not exist, it is created.
+. By default, if an existing patch exists with the JIRA ID, the version of the new patch is incremented ([path]_HBASE-XXXX-v3.patch_). If the [code]+-a+ option is passed, the version is not incremented, but the suffix [literal]+-addendum+ is added ([path]_HBASE-XXXX-v2-addendum.patch_). A second addendum to a given version is not supported.
+. Detects whether you have more than one local commit on your branch.
+ If you do, the script offers you the chance to run +git rebase
+ -i+ to squash the changes into a single commit so that it can use +git format-patch+.
+ If you decline, the script uses +git diff+ instead.
+ The patch is saved in a configurable directory and is ready to be attached to your JIRA.
+
+* .Patching WorkflowAlways patch against the master branch first, even if you want to patch in another branch.
+ HBase committers always apply patches first to the master branch, and backport if necessary.
+* Submit one single patch for a fix.
+ If necessary, squash local commits to merge local commits into a single one first.
+ See this link:http://stackoverflow.com/questions/5308816/how-to-use-git-merge-squash[Stack Overflow question] for more information about squashing commits.
+* The patch should have the JIRA ID in the name.
+ If you are generating from a branch, include the target branch in the filename.
+ A common naming scheme for patches is:
++
+----
+HBASE-XXXX.patch
+----
++
+----
+HBASE-XXXX-0.90.patch # to denote that the patch is against branch 0.90
+----
++
+----
+HBASE-XXXX-v3.patch # to denote that this is the third version of the patch
+----
+
+* To submit a patch, first create it using one of the methods in <>.
+ Next, attach the patch to the JIRA (one patch for the whole fix), using the dialog.
+ Next, click the btn:[Patch
+ Available] button, which triggers the Hudson job which checks the patch for validity.
++
+Please understand that not every patch may get committed, and that feedback will likely be provided on the patch.
+
+* If your patch is longer than a single screen, also attach a Review Board to the case.
+ See <>.
+* If you need to revise your patch, leave the previous patch file(s) attached to the JIRA, and upload the new one, following the naming conventions in <>.
+ Cancel the Patch Available flag and then re-trigger it, by toggling the btn:[Patch Available] button in JIRA.
+ JIRA sorts attached files by the time they were attached, and has no problem with multiple attachments with the same name.
+ However, at times it is easier to refer to different version of a patch if you add [literal]+-vX+, where the [replaceable]_X_ is the version (starting with 2).
+* If you need to submit your patch against multiple branches, rather than just master, name each version of the patch with the branch it is for, following the naming conventions in <>.
+
+.Methods to Create PatchesEclipse::
+ Select the menu item.
+
+Git::
+ [code]+git format-patch+ is preferred because it preserves commit messages.
+ Use [code]+git rebase -i+ first, to combine (squash) smaller commits into a single larger one.
+
+Subversion::
+
+Make sure you review <> and <> for code style.
+If your patch was generated incorrectly or your code does not adhere to the code formatting guidelines, you may be asked to redo some work.
+
+[[submitting.patches.tests]]
+==== Unit Tests
+
+Yes, please.
+Please try to include unit tests with every code patch (and especially new classes and large changes). Make sure unit tests pass locally before submitting the patch.
+
+Also, see <>.
+
+If you are creating a new unit test class, notice how other unit test classes have classification/sizing annotations at the top and a static method on the end.
+Be sure to include these in any new unit test files you generate.
+See <> for more on how the annotations work.
+
+==== Integration Tests
+
+Significant new features should provide an integration test in addition to unit tests, suitable for exercising the new feature at different points in its configuration space.
+
+==== ReviewBoard
+
+Patches larger than one screen, or patches that will be tricky to review, should go through link:http://reviews.apache.org[ReviewBoard].
+
+.Procedure: Use ReviewBoard
+. Register for an account if you don't already have one.
+ It does not use the credentials from link:http://issues.apache.org[issues.apache.org].
+ Log in.
+. Click [label]#New Review Request#.
+. Choose the [literal]+hbase-git+ repository.
+ Click Choose File to select the diff and optionally a parent diff.
+ Click btn:[Create
+ Review Request].
+. Fill in the fields as required.
+ At the minimum, fill in the [label]#Summary# and choose [literal]+hbase+ as the [label]#Review Group#.
+ If you fill in the [label]#Bugs# field, the review board links back to the relevant JIRA.
+ The more fields you fill in, the better.
+ Click btn:[Publish] to make your review request public.
+ An email will be sent to everyone in the [literal]+hbase+ group, to review the patch.
+. Back in your JIRA, click , and paste in the URL of your ReviewBoard request.
+ This attaches the ReviewBoard to the JIRA, for easy access.
+. To cancel the request, click .
+
+For more information on how to use ReviewBoard, see link:http://www.reviewboard.org/docs/manual/1.5/[the ReviewBoard
+ documentation].
+
+==== Guide for HBase Committers
+
+===== New committers
+
+New committers are encouraged to first read Apache's generic committer documentation:
+
+* link:http://www.apache.org/dev/new-committers-guide.html[Apache New Committer Guide]
+* link:http://www.apache.org/dev/committers.html[Apache Committer FAQ]
+
+===== Review
+
+HBase committers should, as often as possible, attempt to review patches submitted by others.
+Ideally every submitted patch will get reviewed by a committer _within a few days_.
+If a committer reviews a patch they have not authored, and believe it to be of sufficient quality, then they can commit the patch, otherwise the patch should be cancelled with a clear explanation for why it was rejected.
+
+The list of submitted patches is in the link:https://issues.apache.org/jira/secure/IssueNavigator.jspa?mode=hide&requestId=12312392[HBase Review Queue], which is ordered by time of last modification.
+Committers should scan the list from top to bottom, looking for patches that they feel qualified to review and possibly commit.
+
+For non-trivial changes, it is required to get another committer to review your own patches before commit.
+Use the btn:[Submit Patch] button in JIRA, just like other contributors, and then wait for a [literal]`+1` response from another committer before committing.
+
+===== Reject
+
+Patches which do not adhere to the guidelines in link:https://wiki.apache.org/hadoop/Hbase/HowToCommit/hadoop/Hbase/HowToContribute#[HowToContribute] and to the link:https://wiki.apache.org/hadoop/Hbase/HowToCommit/hadoop/CodeReviewChecklist#[code review checklist] should be rejected.
+Committers should always be polite to contributors and try to instruct and encourage them to contribute better patches.
+If a committer wishes to improve an unacceptable patch, then it should first be rejected, and a new patch should be attached by the committer for review.
+
+[[committing.patches]]
+===== Commit
+
+Committers commit patches to the Apache HBase GIT repository.
+
+.Before you commit!!!!
+[NOTE]
+====
+Make sure your local configuration is correct, especially your identity and email.
+Examine the output of the +$ git config
+ --list+ command and be sure it is correct.
+See this GitHub article, link:https://help.github.com/articles/set-up-git[Set Up Git] if you need pointers.
+====
+
+When you commit a patch, please:
+
+. Include the Jira issue id in the commit message, along with a short description of the change and the name of the contributor if it is not you.
+ Be sure to get the issue ID right, as this causes Jira to link to the change in Git (use the issue's "All" tab to see these).
+. Commit the patch to a new branch based off master or other intended branch.
+ It's a good idea to call this branch by the JIRA ID.
+ Then check out the relevant target branch where you want to commit, make sure your local branch has all remote changes, by doing a +git pull --rebase+ or another similar command, cherry-pick the change into each relevant branch (such as master), and do +git push
+ +.
++
+WARNING: If you do not have all remote changes, the push will fail.
+If the push fails for any reason, fix the problem or ask for help.
+Do not do a +git push --force+.
++
+Before you can commit a patch, you need to determine how the patch was created.
+The instructions and preferences around the way to create patches have changed, and there will be a transition periond.
++
+* .Determine How a Patch Was CreatedIf the first few lines of the patch look like the headers of an email, with a From, Date, and Subject, it was created using +git format-patch+.
+ This is the preference, because you can reuse the submitter's commit message.
+ If the commit message is not appropriate, you can still use the commit, then run the command +git
+ rebase -i origin/master+, and squash and reword as appropriate.
+* If the first line of the patch looks similar to the following, it was created using +git diff+ without [code]+--no-prefix+.
+ This is acceptable too.
+ Notice the [literal]+a+ and [literal]+b+ in front of the file names.
+ This is the indication that the patch was not created with [code]+--no-prefix+.
++
+----
+diff --git a/src/main/docbkx/developer.xml b/src/main/docbkx/developer.xml
+----
+
+* If the first line of the patch looks similar to the following (without the [literal]+a+ and [literal]+b+), the patch was created with +git diff --no-prefix+ and you need to add [code]+-p0+ to the +git apply+ command below.
++
+----
+diff --git src/main/docbkx/developer.xml src/main/docbkx/developer.xml
+----
+
++
+.Example of Committing a Patch
+====
+One thing you will notice with these examples is that there are a lot of +git pull+ commands.
+The only command that actually writes anything to the remote repository is +git push+, and you need to make absolutely sure you have the correct versions of everything and don't have any conflicts before pushing.
+The extra +git
+ pull+ commands are usually redundant, but better safe than sorry.
+
+The first example shows how to apply a patch that was generated with +git format-patch+ and apply it to the [code]+master+ and [code]+branch-1+ branches.
+
+The directive to use +git format-patch+ rather than +git diff+, and not to use [code]+--no-prefix+, is a new one.
+See the second example for how to apply a patch created with +git
+ diff+, and educate the person who created the patch.
+
+----
+$ git checkout -b HBASE-XXXX
+$ git am ~/Downloads/HBASE-XXXX-v2.patch
+$ git checkout master
+$ git pull --rebase
+$ git cherry-pick
+# Resolve conflicts if necessary or ask the submitter to do it
+$ git pull --rebase # Better safe than sorry
+$ git push origin master
+$ git checkout branch-1
+$ git pull --rebase
+$ git cherry-pick
+# Resolve conflicts if necessary
+$ git pull --rebase # Better safe than sorry
+$ git push origin branch-1
+$ git branch -D HBASE-XXXX
+----
+
+This example shows how to commit a patch that was created using +git diff+ without [code]+--no-prefix+.
+If the patch was created with [code]+--no-prefix+, add [code]+-p0+ to the +git apply+ command.
+
+----
+$ git apply ~/Downloads/HBASE-XXXX-v2.patch
+$ git commit -m "HBASE-XXXX Really Good Code Fix (Joe Schmo)" -a # This extra step is needed for patches created with 'git diff'
+$ git checkout master
+$ git pull --rebase
+$ git cherry-pick
+# Resolve conflicts if necessary or ask the submitter to do it
+$ git pull --rebase # Better safe than sorry
+$ git push origin master
+$ git checkout branch-1
+$ git pull --rebase
+$ git cherry-pick
+# Resolve conflicts if necessary or ask the submitter to do it
+$ git pull --rebase # Better safe than sorry
+$ git push origin branch-1
+$ git branch -D HBASE-XXXX
+----
+====
+
+. Resolve the issue as fixed, thanking the contributor.
+ Always set the "Fix Version" at this point, but please only set a single fix version for each branch where the change was committed, the earliest release in that branch in which the change will appear.
+
+====== Commit Message Format
+
+The commit message should contain the JIRA ID and a description of what the patch does.
+The preferred commit message format is:
+
+----
+ ()
+----
+
+----
+HBASE-12345 Fix All The Things (jane@example.com)
+----
+
+If the contributor used +git format-patch+ to generate the patch, their commit message is in their patch and you can use that, but be sure the JIRA ID is at the front of the commit message, even if the contributor left it out.
+
+[[committer.amending.author]]
+====== Add Amending-Author when a conflict cherrypick backporting
+
+We've established the practice of committing to trunk and then cherry picking back to branches whenever possible.
+When there is a minor conflict we can fix it up and just proceed with the commit.
+The resulting commit retains the original author.
+When the amending author is different from the original committer, add notice of this at the end of the commit message as: [var]+Amending-Author: Author
+ + See discussion at link:http://search-hadoop.com/m/DHED4wHGYS[HBase, mail # dev
+ - [DISCUSSION] Best practice when amending commits cherry picked
+ from master to branch].
+
+[[committer.tests]]
+====== Committers are responsible for making sure commits do not break thebuild or tests
+
+If a committer commits a patch, it is their responsibility to make sure it passes the test suite.
+It is helpful if contributors keep an eye out that their patch does not break the hbase build and/or tests, but ultimately, a contributor cannot be expected to be aware of all the particular vagaries and interconnections that occur in a project like HBase.
+A committer should.
+
+[[git.patch.flow]]
+====== Patching Etiquette
+
+In the thread link:http://search-hadoop.com/m/DHED4EiwOz[HBase, mail # dev - ANNOUNCEMENT: Git Migration In Progress (WAS =>
+ Re: Git Migration)], it was agreed on the following patch flow
+
+. Develop and commit the patch against trunk/master first.
+. Try to cherry-pick the patch when backporting if possible.
+. If this does not work, manually commit the patch to the branch.
+
+====== Merge Commits
+
+Avoid merge commits, as they create problems in the git history.
+
+====== Committing Documentation
+
+See <>.
+
+==== Dialog
+
+Committers should hang out in the #hbase room on irc.freenode.net for real-time discussions.
+However any substantive discussion (as with any off-list project-related discussion) should be re-iterated in Jira or on the developer list.
+
+==== Do not edit JIRA comments
+
+Misspellings and/or bad grammar is preferable to the disruption a JIRA comment edit causes: See the discussion at link:http://search-hadoop.com/?q=%5BReopened%5D+%28HBASE-451%29+Remove+HTableDescriptor+from+HRegionInfo&fc_project=HBase[Re:(HBASE-451) Remove HTableDescriptor from HRegionInfo]
+
+ifdef::backend-docbook[]
+[index]
+== Index
+// Generated automatically by the DocBook toolchain.
+endif::backend-docbook[]
diff --git a/src/main/asciidoc/_chapters/external_apis.adoc b/src/main/asciidoc/_chapters/external_apis.adoc
new file mode 100644
index 0000000..dfc64e3
--- /dev/null
+++ b/src/main/asciidoc/_chapters/external_apis.adoc
@@ -0,0 +1,66 @@
+////
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+////
+
+[[external_apis]]
+= Apache HBase External APIs
+:doctype: book
+:numbered:
+:toc: left
+:icons: font
+:experimental:
+
+This chapter will cover access to Apache HBase either through non-Java languages, or through custom protocols.
+For information on using the native HBase APIs, refer to link:http://hbase.apache.org/apidocs/index.html[User API Reference] and the new <