Puts: Put(row, fam, qf, value) Deletes: Delete(row) Delete(row, fam) Delete(row, fam, qf) Delete(row, fam, qf, ts) To both puts and deletes the type is added afterwards to reflect the type of action and can be lumped together into BatchUpdates. A delete should just be a put with value length set to 0 and the type set accordingly, same as how it's stored. Gets can be put into a [] to make it more batch like. Since most of them already can take a Family as an argument they are already batchy in their behavoir. Gets: Can early out as soon as maxNrVersions are fulfilled GetColumns(row, fam, qf) GetColumns(row, fam ,qf, maxNrVersions) GetColumns(row, Family) GetColumns(row, Family, maxNrVersions) GetColumns(row, Family[]) GetColumns(row, Family[], maxNrVersions) Always have to go through all files, no early out GetFamilies(row) GetFamilies(row, maxNrVersions) GetFamilies(row, fam) GetFamilies(row, fam, maxNrVersions) GetFamilies(row, byte[][] families) GetFamilies(row, byte[][] families, maxNrVersions) The difference between Family and families is that Family have to include at least one column and families will just ask for everything, or if we make another class that just holds that, cause if we admit empty columns in the Family we have a totally different query than with them specified. extra gets with limits of different kinds GetRange(row, before) GetRange(row, after, before) GetRange(row, fam, before) GetRange(row, Family, before) GetRange(row, Family[], before) GetRange(row, fam, after, before) GetRange(row, Family, after, before) GetRange(row, Family[], after, before) GetRange(row, fam, qf, before) GetRange(row, fam, qf, after, before) This name is not the best but it will return the maxNr of entries taken from the sorted memCache and storefiles. So basically the first entries that it encounters, for the latest version only. GetTop(row, maxNr) GetTop(row, fam, maxNr) GetTop(row, Family, maxNr) Different categories of gets All the get*(row) might have to be treated differently since they span multiple stores. The reason that we have divided the gets up in the groups we have is for them to more reflect on how they will be dealt with server side. getLatest(row, fam, qf) and getVersions(row, fam, qf, maxNrVersions) can early out when ready. getLatest(rest) and getVersions(rest), have to look in all storefiles. getRange(*), can be earlied out as soon as you are done with the storefile that first encountered ts< getTop(*), can early out as soon as the number asked for is fulfilled, potentially the most efficient query. So maybe should regroup them again to something like: GetColumns(row, fam, qf) GetColumns(row, fam ,qf, maxNrVersions) GetColumns(row, Family) GetColumns(row, Family, maxNrVersions) GetColumns(row, Family[]) GetColumns(row, Family[], maxNrVersions) GetFamilies(row) GetFamilies(row, maxNrVersions) GetFamilies(row, fam) GetFamilies(row, fam, maxNrVersions) GetFamilies(row, byte[][] families) GetFamilies(row, byte[][] families, maxNrVersions) New Classes: class KV{ byte[] ptr int offset int length } class Result { byte [] row; Map families; Result(KV) Result(List) public Column getColumn() public byte [] getValue() { return families.getFirst().getValue(); } ... } public class Family { private byte [] family; private Set columns; //Not sure if this should be a per Family setting or per GetColumns, setting //it to per Family for now private int maxNrVersions public Family(byte [] family, byte [] column) { this(family, column, 1); } public Family(byte [] family, byte [] column, int maxNrVersions) { this.family = family; this.columns = new TreeSet(Bytes.Comparator); this.columns.add(column); this.maxNrVersions = maxNrVersions; } public Family(byte [] family, byte [][] columns) { this(family, columns, 1); } public Family(byte [] family, byte [][] columns, int maxNrVersions) { this.family = family; this.columns = new TreeSet<...> this.maxNrVersions = maxNrVersions; for(byte [] column: columns) this.columns.add(column); } public void add(byte [] column) { checkSet(); this.columns.add(column); } public byte [] getFamily() { return family; } public byte [][] getColumns() { return columns.toArray(new byte[][]); } public int getMaxNrVersions(){ return maxNrVersions; } private void checkSet() { if(columns == null) columns = new TreeSet(Bytes.comparator); } } public class Column { private byte [] column; private long ts; public Column(byte [] column) { this.column = column; this.ts = LATEST; } public Column(byte [] column, long ts) { this.column = column; this.ts = ts; } public byte [] getColumn() { return column; } public long getTimestamp() { return ts; } } public class TimeRange { private long tsMin; private long tsMax; public TimeRange(long tsMin, long tsMax) { this.tsMin = tsMin; this.tsMax = tsMax; } public TimeRange(long tsMin) { this(tsMin,Long.MAX_VALUE); } public long getMin() { return tsMin; } public long getMax() { return tsMax; } } interface Update { Type enum {Put, DeleteRow, DeleteFamily, DeleteColumn, DeleteVersion} getType() } class Put implements Update { int type; Put(fam, qf, value) getType() } class Delete implements Update { int type; Delete() Delete(fam) Delete(fam, qf) Delete(fam, qf, ts) getType() } class BatchUpdate { byte [] row; List keyvals; BatchUpdate(row); BatchUpdate(List) getRow() getKeyValues() add(Update) //These methods can also be added unless we want to make it write once then //read only BatchUpdate(); setRow() } interface Get { getRow() getFam() } For the gets, not sure if it is going to be better to keep the List structure or just keeping variables in the classes. Things to deal with are different comparators on the server side. class GetColumns implements Get { byte [] row Set families GetColumns(row, fam, qf) GetColumns(row, fam ,qf, maxNrVersions) GetColumns(row, Family) GetColumns(row, Family, maxNrVersions) GetColumns(row, Family[]) GetColumns(row, Family[], maxNrVersions) addFamily(fam, qf) addFamily(fam ,qf, maxNrVersions) addFamily(Family) addFamily(Family, maxNrVersions) addFamily(Family[]) addFamily(Family[], maxNrVersions[]) byte [] getRow() Family [] getFamilies() } class GetFamilies implements Get { byte[] row Set<[][]byte> families; int[] maxVersions; GetFamilies(row) GetFamilies(row, maxNrVersions) GetFamilies(row, fam) GetFamilies(row, fam, maxNrVersions) GetFamilies(row, byte[][] families) GetFamilies(row, byte[][] families, []maxNrVersions) byte [] getRow() Family [] getFamilies() int[] getMaxVersions() } class GetRange implements Get { byte [] row; Set families; TimeRange tr; GetRange(row, before) GetRange(row, after, before) GetRange(row, fam, before) GetRange(row, Family, before) GetRange(row, Family[], before) GetRange(row, fam, after, before) GetRange(row, Family, after, before) GetRange(row, Family[], after, before) GetRange(row, fam, qf, before) GetRange(row, fam, qf, after, before) byte [] getRow() Family [] getFamilies() TimeRange getTimeRange() } class GetTop implements Get { byte[] row Family family; int maxNr; GetTop(row, maxNr) GetTop(row, fam, maxNr) GetTop(row, Family, maxNr) byte [] getRow() Family getFamily() int getMaxNr() }