diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java index d4d8ee4..a64cfac 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java @@ -112,6 +112,11 @@ public class HColumnDescriptor implements Comparable { public static final String REPLICATION_SCOPE = "REPLICATION_SCOPE"; public static final byte[] REPLICATION_SCOPE_BYTES = Bytes.toBytes(REPLICATION_SCOPE); public static final String MIN_VERSIONS = "MIN_VERSIONS"; + /** + * Retain all cells across flushes and compactions even if they fall behind + * a delete tombstone. To see all retained cells, do a 'raw' scan; see + * Scan#setRaw or pass RAW => true attribute in the shell. + */ public static final String KEEP_DELETED_CELLS = "KEEP_DELETED_CELLS"; public static final String COMPRESS_TAGS = "COMPRESS_TAGS"; diff --git a/hbase-shell/src/main/ruby/shell/commands/create.rb b/hbase-shell/src/main/ruby/shell/commands/create.rb index ab3a3d1..fca42cb 100644 --- a/hbase-shell/src/main/ruby/shell/commands/create.rb +++ b/hbase-shell/src/main/ruby/shell/commands/create.rb @@ -25,8 +25,8 @@ module Shell Creates a table. Pass a table name, and a set of column family specifications (at least one), and, optionally, table configuration. Column specification can be a simple string (name), or a dictionary -(dictionaries are described below in main help output), necessarily -including NAME attribute. +(dictionaries are described below in main help output), necessarily +including NAME attribute. Examples: Create a table with namespace=ns1 and table qualifier=t1 @@ -38,7 +38,7 @@ Create a table with namespace=default and table qualifier=t1 hbase> create 't1', 'f1', 'f2', 'f3' hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, BLOCKCACHE => true} hbase> create 't1', {NAME => 'f1', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '10'}} - + Table configuration options can be put at the end. Examples: diff --git a/src/main/asciidoc/_chapters/datamodel.adoc b/src/main/asciidoc/_chapters/datamodel.adoc index 74238ca..b76adc8 100644 --- a/src/main/asciidoc/_chapters/datamodel.adoc +++ b/src/main/asciidoc/_chapters/datamodel.adoc @@ -495,7 +495,7 @@ For an informative discussion on how deletes and versioning interact, see the th Also see <> for more information on the internal KeyValue format. -Delete markers are purged during the next major compaction of the store, unless the `KEEP_DELETED_CELLS` option is set in the column family. +Delete markers are purged during the next major compaction of the store, unless the `KEEP_DELETED_CELLS` option is set in the column family (See <>). To keep the deletes for a configurable amount of time, you can set the delete TTL via the +hbase.hstore.time.to.purge.deletes+ property in _hbase-site.xml_. If `hbase.hstore.time.to.purge.deletes` is not set, or set to 0, all delete markers, including those with timestamps in the future, are purged during the next major compaction. Otherwise, a delete marker with a timestamp in the future is kept until the major compaction which occurs after the time represented by the marker's timestamp plus the value of `hbase.hstore.time.to.purge.deletes`, in milliseconds. diff --git a/src/main/asciidoc/_chapters/schema_design.adoc b/src/main/asciidoc/_chapters/schema_design.adoc index 28f28a5..52fc83a 100644 --- a/src/main/asciidoc/_chapters/schema_design.adoc +++ b/src/main/asciidoc/_chapters/schema_design.adoc @@ -461,7 +461,98 @@ HColumnDescriptor.setKeepDeletedCells(true); ---- ==== -See the API documentation for link:http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html#KEEP_DELETED_CELLS[KEEP_DELETED_CELLS] for more information. +Let us illustrate the basic effect of setting the `KEEP_DELETED_CELLS` attribute on a table. + +First, without: +[source] +---- +create 'test', {NAME=>'e', VERSIONS=>2147483647} +put 'test', 'r1', 'e:c1', 'value', 10 +put 'test', 'r1', 'e:c1', 'value', 12 +put 'test', 'r1', 'e:c1', 'value', 14 +delete 'test', 'r1', 'e:c1', 11 + +hbase(main):017:0> scan 'test', {RAW=>true, VERSIONS=>1000} +ROW COLUMN+CELL + r1 column=e:c1, timestamp=14, value=value + r1 column=e:c1, timestamp=12, value=value + r1 column=e:c1, timestamp=11, type=DeleteColumn + r1 column=e:c1, timestamp=10, value=value +1 row(s) in 0.0120 seconds + +hbase(main):018:0> flush 'test' +0 row(s) in 0.0350 seconds + +hbase(main):019:0> scan 'test', {RAW=>true, VERSIONS=>1000} +ROW COLUMN+CELL + r1 column=e:c1, timestamp=14, value=value + r1 column=e:c1, timestamp=12, value=value + r1 column=e:c1, timestamp=11, type=DeleteColumn +1 row(s) in 0.0120 seconds + +hbase(main):020:0> major_compact 'test' +0 row(s) in 0.0260 seconds + +hbase(main):021:0> scan 'test', {RAW=>true, VERSIONS=>1000} +ROW COLUMN+CELL + r1 column=e:c1, timestamp=14, value=value + r1 column=e:c1, timestamp=12, value=value +1 row(s) in 0.0120 seconds +---- + +Notice how delete cells are let go. + +Now lets run the same test only with `KEEP_DELETED_CELLS` set on the table (you can do table or per-column-family): + +[source] +---- +hbase(main):005:0> create 'test', {NAME=>'e', VERSIONS=>2147483647, KEEP_DELETED_CELLS => true} +0 row(s) in 0.2160 seconds + +=> Hbase::Table - test +hbase(main):006:0> put 'test', 'r1', 'e:c1', 'value', 10 +0 row(s) in 0.1070 seconds + +hbase(main):007:0> put 'test', 'r1', 'e:c1', 'value', 12 +0 row(s) in 0.0140 seconds + +hbase(main):008:0> put 'test', 'r1', 'e:c1', 'value', 14 +0 row(s) in 0.0160 seconds + +hbase(main):009:0> delete 'test', 'r1', 'e:c1', 11 +0 row(s) in 0.0290 seconds + +hbase(main):010:0> scan 'test', {RAW=>true, VERSIONS=>1000} +ROW COLUMN+CELL + r1 column=e:c1, timestamp=14, value=value + r1 column=e:c1, timestamp=12, value=value + r1 column=e:c1, timestamp=11, type=DeleteColumn + r1 column=e:c1, timestamp=10, value=value +1 row(s) in 0.0550 seconds + +hbase(main):011:0> flush 'test' +0 row(s) in 0.2780 seconds + +hbase(main):012:0> scan 'test', {RAW=>true, VERSIONS=>1000} +ROW COLUMN+CELL + r1 column=e:c1, timestamp=14, value=value + r1 column=e:c1, timestamp=12, value=value + r1 column=e:c1, timestamp=11, type=DeleteColumn + r1 column=e:c1, timestamp=10, value=value +1 row(s) in 0.0620 seconds + +hbase(main):013:0> major_compact 'test' +0 row(s) in 0.0530 seconds + +hbase(main):014:0> scan 'test', {RAW=>true, VERSIONS=>1000} +ROW COLUMN+CELL + r1 column=e:c1, timestamp=14, value=value + r1 column=e:c1, timestamp=12, value=value + r1 column=e:c1, timestamp=11, type=DeleteColumn + r1 column=e:c1, timestamp=10, value=value +1 row(s) in 0.0650 seconds +---- + [[secondary.indexes]] == Secondary Indexes and Alternate Query Paths