Puts: Put(row, fam, qf, value) Deletes: Delete(row) Delete(row, fam) Delete(row, fam, qf) Delete(row, fam, qf, ts) To both puts and deletes the type is added afterwards to reflect the type of action and can be lumped together into BatchUpdates. A delete should just be a put with value length set to 0 and the type set accordingly, same as how it's stored. Gets can be put into a [] to make it more batch like. Since most of them already can take a Family as an argument they are already batchy in their behavoir. Gets: Getting latest version GetLatest(row) GetLatest(row, fam) GetLatest(row, fam, qf) GetLatest(row, byte [][] fam, byte [][] qf) GetLatest(row, Family) GetLatest(row, Family[]) Getting versions for all and maxNr GetVersions(row) GetVersions(row, fam) GetVersions(row, fam, qf) GetVersions(row, Family) GetVersions(row, Family [] families) GetVersions(row, maxNrVersions) GetVersions(row, fam, maxNrVersions) GetVersions(row, fam, qf, maxNrVersions) GetVersions(row, Family, maxVersions) GetVersions(row, Family [] families, maxVersions) extra gets with limits of different kinds GetRange(row, before) GetRange(row, after, before) GetRange(row, fam, before) GetRange(row, Family, before) GetRange(row, Family[], before) GetRange(row, fam, after, before) GetRange(row, Family, after, before) GetRange(row, Family[], after, before) GetRange(row, fam, qf, before) GetRange(row, fam, qf, after, before) This name is not the best but it will return the maxNr of entries taken from the sorted memCache and storefiles. So basically the first entries that it encounters, for the latest version only. GetTop(row, maxNr) GetTop(row, fam, maxNr) GetTop(row, Family, maxNr) Different categories of gets All the get*(row) might have to be treated differently since they span multiple stores. The reason that we have divided the gets up in the groups we have is for them to more reflect on how they will be dealt with server side. getLatest(row, fam, qf) and getVersions(row, fam, qf, maxNrVersions) can early out when ready. getLatest(rest) and getVersions(rest), have to look in all storefiles. getRange(*), can be earlied out as soon as you are done with the storefile that first encountered ts< getTop(*), can early out as soon as the number asked for is fulfilled, potentially the most efficient query. So maybe should regroup them again to something like: GetColumns(row, fam, qf) GetColumns(row, fam ,qf, maxNrVersions) GetColumns(row, Family) GetColumns(row, Family, maxNrVersions) GetColumns(row, Family[]) GetColumns(row, Family[], maxNrVersions) GetFamilies(row) GetFamilies(row, maxNrVersions) GetFamilies(row, fam) GetFamilies(row, fam, maxNrVersions) GetFamilies(row, byte[][] families) GetFamilies(row, byte[][] families, maxNrVersions) New Classes: class KV{ byte[] ptr int offset int length } class Result { byte [] row; Map families; Result(KV) Result(List) public Column getColumn() public byte [] getValue() { return families.getFirst().getValue(); } ... } public class Family { private byte [] family; private Set columns; public Family(byte [] family, byte [] column) { this.family = family; this.columns = new TreeSet(Bytes.Comparator); this.columns.add(column); } public Family(byte [] family, byte [][] columns) { this.family = family; this.columns = new TreeSet<...> for(byte [] column: columns) this.columns.add(column); } public void add(byte [] column) { checkSet(); this.columns.add(column); } public byte [] getFamily() { return family; } public byte [][] getColumns() { return columns.toArray(new byte[][]); } private void checkSet() { if(columns == null) columns = new TreeSet(Bytes.comparator); } } public class Column { private byte [] column; private long ts; public Column(byte [] column) { this.column = column; this.ts = LATEST; } public Column(byte [] column, long ts) { this.column = column; this.ts = ts; } public byte [] getColumn() { return column; } public long getTimestamp() { return ts; } } public class TimeRange { private long tsMin; private long tsMax; public TimeRange(long tsMin, long tsMax) { this.tsMin = tsMin; this.tsMax = tsMax; } public TimeRange(long tsMin) { this(tsMin,Long.MAX_VALUE); } public long getMin() { return tsMin; } public long getMax() { return tsMax; } } interface Update { Type enum {Put, DeleteRow, DeleteFamily, DeleteColumn, DeleteVersion} getType() } class Put implements Update { int type; Put(fam, qf, value) getType() } class Delete implements Update { int type; Delete() Delete(fam) Delete(fam, qf) Delete(fam, qf, ts) getType() } class BatchUpdate { byte [] row; List keyvals; BatchUpdate(row); BatchUpdate(List) getRow() getKeyValues() add(Update) //These methods can also be added unless we want to make it write once then //read only BatchUpdate(); setRow() } interface Get { getRow() getFam() } For the gets, not sure if it is going to be better to keep the List structure or just keeping variables in the classes. Things to deal with are different comparators on the server side. class GetLatest implements Get { byte [] row Set families; GetLatest(row) GetLatest(row, fam) GetLatest(row, fam, qf) GetLatest(row, byte [][] fam, byte [][] qf) GetLatest(row, Family) GetLatest(row, List) addFamily(fam) addFamily(Family) addColumn(fam, qf) addColumn(fam, Column) byte [] getRow() Family [] getFamilies() } class GetVersions implements Get { byte[] row Set families; int maxVersions; GetVersions(row) GetVersions(row, fam) GetVersions(row, fam, qf) GetVersions(row, Family) GetVersions(row, Family [] families) GetVersions(row, maxNrVersions) GetVersions(row, fam, maxNrVersions) GetVersions(row, fam, qf, maxNrVersions) GetVersions(row, Family, maxVersions) GetVersions(row, Family [] families, maxVersions) byte [] getRow() Family [] getFamilies() int getMaxVersions() } class GetRange implements Get { byte [] row; Set families; TimeRange tr; GetRange(row, before) GetRange(row, after, before) GetRange(row, fam, before) GetRange(row, Family, before) GetRange(row, Family[], before) GetRange(row, fam, after, before) GetRange(row, Family, after, before) GetRange(row, Family[], after, before) GetRange(row, fam, qf, before) GetRange(row, fam, qf, after, before) byte [] getRow() Family [] getFamilies() TimeRange getTimeRange() } class GetTop implements Get { byte[] row Family family; int maxNr; GetTop(row, maxNr) GetTop(row, fam, maxNr) GetTop(row, Family, maxNr) byte [] getRow() Family getFamily() int getMaxNr() }