Index: src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteEndpoint.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteEndpoint.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteEndpoint.java (revision 0) @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.coprocessor.example; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants.OperationStatusCode; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Mutation; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.coprocessor.BaseEndpointCoprocessor; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; +import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.OperationStatus; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; + +public class BulkDeleteEndpoint extends BaseEndpointCoprocessor implements BulkDeleteProtocol { + private static final Log LOG = LogFactory.getLog(BulkDeleteEndpoint.class); + + @Override + public BulkDeleteResponse deleteRows(Scan scan, int rowBatchSize) { + long totalRowsDeleted = 0L; + BulkDeleteResponse response = new BulkDeleteResponse(); + HRegion region = ((RegionCoprocessorEnvironment) getEnvironment()).getRegion(); + List results = new ArrayList(); + boolean hasMore = false; + RegionScanner scanner = null; + if (scan.getFilter() == null) { + // What we need is just the rowkeys. So only 1st KV from any row is enough. + scan.setFilter(new FirstKeyOnlyFilter()); + } + // When the delete is based on some conditions so that Filters are available in the scan, + // we assume that the scan is perfect having necessary column(s) only. + try { + scanner = region.getScanner(scan); + while (true) { + Set deleteRowKeys = new TreeSet(Bytes.BYTES_COMPARATOR); + for (int i = 0; i < rowBatchSize; i++) { + hasMore = scanner.next(results); + if (results.size() > 0) { + // We just need the rowkey. Get it from 1st KV. + byte[] row = results.get(0).getRow(); + deleteRowKeys.add(row); + results.clear(); + } + if (!hasMore) { + // There are no more rows. + break; + } + } + if (deleteRowKeys.size() > 0) { + Pair[] deleteWithLockArr = new Pair[deleteRowKeys.size()]; + int i = 0; + for (byte[] deleteRowKey : deleteRowKeys) { + deleteWithLockArr[i++] = new Pair(new Delete(deleteRowKey), null); + } + OperationStatus[] opStatus = region.batchMutate(deleteWithLockArr); + for (OperationStatus status : opStatus) { + if (status.getOperationStatusCode() == OperationStatusCode.SUCCESS) { + totalRowsDeleted++; + } + } + } else { + break; + } + } + } catch (IOException ioe) { + LOG.error(ioe); + response.setIoException(ioe); + } finally { + if (scanner != null) { + try { + scanner.close(); + } catch (IOException ioe) { + LOG.debug(ioe); + } + } + } + response.setRowsDeleted(totalRowsDeleted); + return response; + } +} Index: src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteProtocol.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteProtocol.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteProtocol.java (revision 0) @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.coprocessor.example; + +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; + +/** + * Defines a protocol to delete rows in bulk based on a scan. The scan can be range scan or with + * conditions(filters) etc. + *
Example:
+ * Scan scan = new Scan();
+ * // set scan properties(range, filters etc). 
+ * HTable ht = ...;
+ * Batch.Call<BulkDeleteProtocol, BulkDeleteResponse> callable = 
+ *     new Batch.Call<BulkDeleteProtocol, BulkDeleteResponse>() {
+ *   public BulkDeleteResponse call(BulkDeleteProtocol instance) throws IOException {
+ *     return instance.deleteRows(scan, rowBatchSize);
+ *   }
+ * };
+ * Map result = ht.coprocessorExec(BulkDeleteProtocol.class,
+ *      scan.getStartRow(), scan.getStopRow(), callable);
+ *  for (BulkDeleteResponse response : result.values()) {
+ *    noOfDeletedRows += response.getRowsDeleted();
+ *  }
+ * 
+ */ +public interface BulkDeleteProtocol extends CoprocessorProtocol { + /** + * @param scan + * @param rowBatchSize + * - The number of rows which need to be accumulated by scan and delete as one batch. + * @return {@link BulkDeleteResponse} + */ + BulkDeleteResponse deleteRows(Scan scan, int rowBatchSize); +} Index: src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteResponse.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteResponse.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/coprocessor/example/BulkDeleteResponse.java (revision 0) @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.coprocessor.example; + +import java.io.IOException; +import java.io.Serializable; + +/** + * Wrapper class which returns the result of the bulk deletion operation happened at the server for + * a region. This includes the total number of rows deleted and/or any {@link IOException} which is + * happened while doing the operation. + */ +public class BulkDeleteResponse implements Serializable { + private static final long serialVersionUID = -8192337710525997237L; + private Long rowsDeleted; + private IOException ioException; + + public BulkDeleteResponse() { + + } + + public void setRowsDeleted(Long rowsDeleted) { + this.rowsDeleted = rowsDeleted; + } + + public Long getRowsDeleted() { + return rowsDeleted; + } + + public void setIoException(IOException ioException) { + this.ioException = ioException; + } + + public IOException getIoException() { + return ioException; + } +} Index: src/test/java/org/apache/hadoop/hbase/coprocessor/example/TestBulkDeleteProtocol.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/coprocessor/example/TestBulkDeleteProtocol.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/coprocessor/example/TestBulkDeleteProtocol.java (revision 0) @@ -0,0 +1,144 @@ +/* + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.coprocessor.example; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.coprocessor.Batch; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; +import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MediumTests.class) +public class TestBulkDeleteProtocol { + private static final byte[] FAMILY = Bytes.toBytes("cf1"); + private static final byte[] QUALIFIER = Bytes.toBytes("c1"); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void setupBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, + "org.apache.hadoop.hbase.coprocessor.example.BulkDeleteEndpoint"); + TEST_UTIL.startMiniCluster(2); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testBulkDeleteEndpoint() throws Throwable { + byte[] tableName = Bytes.toBytes("testBulkDeleteEndpoint"); + HTableDescriptor htd = new HTableDescriptor(tableName); + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY); + htd.addFamily(hcd); + TEST_UTIL.getHBaseAdmin().createTable(htd, Bytes.toBytes(0), Bytes.toBytes(120), 5); + + HTable ht = new HTable(TEST_UTIL.getConfiguration(), tableName); + List puts = new ArrayList(100); + for (int j = 0; j < 100; j++) { + byte[] rowkey = Bytes.toBytes(j); + puts.add(createPut(rowkey, "v1")); + } + ht.put(puts); + // Deleting all the rows. + long noOfRowsDeleted = invokeBulkDeleteProtocol(tableName, new Scan(), 500); + assertEquals(100, noOfRowsDeleted); + + int rows = 0; + for (Result result : ht.getScanner(new Scan())) { + rows++; + } + assertEquals(0, rows); + } + + private long invokeBulkDeleteProtocol(byte[] tableName, final Scan scan, final int rowBatchSize) + throws Throwable { + HTable ht = new HTable(TEST_UTIL.getConfiguration(), tableName); + long noOfDeletedRows = 0L; + Batch.Call callable = + new Batch.Call() { + public BulkDeleteResponse call(BulkDeleteProtocol instance) throws IOException { + return instance.deleteRows(scan, rowBatchSize); + } + }; + Map result = ht.coprocessorExec(BulkDeleteProtocol.class, + scan.getStartRow(), scan.getStopRow(), callable); + for (BulkDeleteResponse response : result.values()) { + noOfDeletedRows += response.getRowsDeleted(); + } + return noOfDeletedRows; + } + + @Test + public void testBulkDeleteWithConditionBasedDelete() throws Throwable { + byte[] tableName = Bytes.toBytes("testBulkDeleteWithConditionBasedDelete"); + HTableDescriptor htd = new HTableDescriptor(tableName); + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY); + htd.addFamily(hcd); + TEST_UTIL.getHBaseAdmin().createTable(htd, Bytes.toBytes(0), Bytes.toBytes(120), 5); + HTable ht = new HTable(TEST_UTIL.getConfiguration(), tableName); + List puts = new ArrayList(100); + for (int j = 0; j < 100; j++) { + byte[] rowkey = Bytes.toBytes(j); + String value = (j % 10 == 0) ? "v1" : "v2"; + puts.add(createPut(rowkey, value)); + } + ht.put(puts); + Scan scan = new Scan(); + SingleColumnValueFilter scvf = new SingleColumnValueFilter(FAMILY, QUALIFIER, CompareOp.EQUAL, + Bytes.toBytes("v1")); + scan.setFilter(scvf); + // Deleting all the rows where cf1:c1=v1 + long noOfRowsDeleted = invokeBulkDeleteProtocol(tableName, scan, 500); + assertEquals(10, noOfRowsDeleted); + + int rows = 0; + for (Result result : ht.getScanner(new Scan())) { + rows++; + } + assertEquals(90, rows); + } + + private Put createPut(byte[] rowkey, String value) throws IOException { + Put put = new Put(rowkey); + put.add(FAMILY, QUALIFIER, value.getBytes()); + return put; + } +}