diff --git data/files/cbo_t1.txt data/files/cbo_t1.txt index e8034a4..8a0fbec 100644 --- data/files/cbo_t1.txt +++ data/files/cbo_t1.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t2.txt data/files/cbo_t2.txt index 34633d3..4e86437 100644 --- data/files/cbo_t2.txt +++ data/files/cbo_t2.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t3.txt data/files/cbo_t3.txt index a9c995e..87b1613f 100644 --- data/files/cbo_t3.txt +++ data/files/cbo_t3.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t4.txt data/files/cbo_t4.txt index 56e0794..22825b3 100644 --- data/files/cbo_t4.txt +++ data/files/cbo_t4.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t5.txt data/files/cbo_t5.txt index 245b1b9..9dadc77 100644 --- data/files/cbo_t5.txt +++ data/files/cbo_t5.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t6.txt data/files/cbo_t6.txt index dd72edd..5f25ccb 100644 --- data/files/cbo_t6.txt +++ data/files/cbo_t6.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java index 2727b36..3db034c 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java @@ -66,8 +66,8 @@ public void init( Result r, byte [] columnFamilyBytes, List binaryStorage, byte[] qualPrefix) { - - result = r; + this.isNull = false; + this.result = r; this.columnFamilyBytes = columnFamilyBytes; this.binaryStorage = binaryStorage; this.qualPrefix = qualPrefix; @@ -118,6 +118,12 @@ private void parse() { valueRef.setData(e.getValue()); value.init(valueRef, 0, valueRef.getData().length); + if (isNull(oi.getNullSequence(), valueRef, 0, valueRef.getData().length)) { + value.setNull(); + } else { + value.init(valueRef, 0, valueRef.getData().length); + } + // Put the key/value into the map cachedMap.put(key.getObject(), value.getObject()); } diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java index 8a3eac7..8bc4ac6 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java @@ -138,13 +138,21 @@ private Object uncheckedGetField(int fieldID) { boolean [] fieldsInited = getFieldInited(); if (!fieldsInited[fieldID]) { - ByteArrayRef ref = null; + fieldsInited[fieldID] = true; + ColumnMapping colMap = columnsMapping[fieldID]; - if (colMap.hbaseRowKey) { - ref = new ByteArrayRef(); - ref.setData(result.getRow()); - } else if (colMap.hbaseTimestamp) { + if (!colMap.hbaseRowKey && colMap.qualifierName == null) { + // it is a column family + // primitive type for Map can be stored in binary format. Pass in the + // qualifier prefix to cherry pick the qualifiers that match the prefix instead of picking + // up everything + ((LazyHBaseCellMap) fields[fieldID]).init( + result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes); + return fields[fieldID].getObject(); + } + + if (colMap.hbaseTimestamp) { long timestamp = result.rawCells()[0].getTimestamp(); // from hbase-0.96.0 LazyObjectBase lz = fields[fieldID]; if (lz instanceof LazyTimestamp) { @@ -152,35 +160,25 @@ private Object uncheckedGetField(int fieldID) { } else { ((LazyLong) lz).getWritableObject().set(timestamp); } - } else { - if (colMap.qualifierName == null) { - // it is a column family - // primitive type for Map can be stored in binary format. Pass in the - // qualifier prefix to cherry pick the qualifiers that match the prefix instead of picking - // up everything - ((LazyHBaseCellMap) fields[fieldID]).init( - result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes); - } else { - // it is a column i.e. a column-family with column-qualifier - byte [] res = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes); - - if (res == null) { - return null; - } else { - ref = new ByteArrayRef(); - ref.setData(res); - } - } + return lz.getObject(); } - if (ref != null) { - fields[fieldID].init(ref, 0, ref.getData().length); + byte[] bytes; + if (colMap.hbaseRowKey) { + bytes = result.getRow(); + } else { + // it is a column i.e. a column-family with column-qualifier + bytes = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes); + } + if (bytes == null || isNull(oi.getNullSequence(), bytes, 0, bytes.length)) { + fields[fieldID].setNull(); + } else { + ByteArrayRef ref = new ByteArrayRef(); + ref.setData(bytes); + fields[fieldID].init(ref, 0, bytes.length); } } - // Has to be set last because of HIVE-3179: NULL fields would not work otherwise - fieldsInited[fieldID] = true; - return fields[fieldID].getObject(); } diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory.java index 8962533..38ae9c5 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory.java @@ -71,15 +71,22 @@ public LazyObjectBase createKey(ObjectInspector inspector) throws SerDeException private static class DoubleDollarSeparated implements LazyObjectBase { private Object[] fields; + private transient boolean isNull; @Override public void init(ByteArrayRef bytes, int start, int length) { fields = new String(bytes.getData(), start, length).split(DELIMITER_PATTERN); + isNull = false; + } + + @Override + public void setNull() { + isNull = true; } @Override public Object getObject() { - return this; + return isNull ? null : this; } } diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory2.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory2.java index ecd5061..b7eb60f 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory2.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseKeyFactory2.java @@ -200,6 +200,8 @@ private HBaseScanRange setupFilter(String keyColName, List private final int fixedLength; private final List fields = new ArrayList(); + private transient boolean isNull; + public FixedLengthed(int fixedLength) { this.fixedLength = fixedLength; } @@ -213,11 +215,17 @@ public void init(ByteArrayRef bytes, int start, int length) { for (; rowStart < length; rowStart = rowStop + 1, rowStop = rowStart + fixedLength) { fields.add(new String(data, rowStart, rowStop - rowStart).trim()); } + isNull = false; + } + + @Override + public void setNull() { + isNull = true; } @Override public Object getObject() { - return this; + return isNull ? null : this; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java index ce0cfb3..3dc73a2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java @@ -182,16 +182,14 @@ private Object uncheckedGetElement(int index) { Text nullSequence = oi.getNullSequence(); - int elementLength = startPosition[index + 1] - startPosition[index] - 1; - if (elementLength == nullSequence.getLength() - && 0 == LazyUtils - .compare(bytes.getData(), startPosition[index], elementLength, - nullSequence.getBytes(), 0, nullSequence.getLength())) { + int elementStart = startPosition[index]; + int elementLength = startPosition[index + 1] - elementStart - 1; + if (isNull(oi.getNullSequence(), bytes, elementStart, elementLength)) { return arrayElements[index] = null; } arrayElements[index] = LazyFactory .createLazyObject(oi.getListElementObjectInspector()); - arrayElements[index].init(bytes, startPosition[index], elementLength); + arrayElements[index].init(bytes, elementStart, elementLength); return arrayElements[index].getObject(); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java index e6932d9..299bb36 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java @@ -270,10 +270,7 @@ private LazyObject uncheckedGetValue(int index) { Text nullSequence = oi.getNullSequence(); int valueIBegin = keyEnd[index] + 1; int valueILength = valueLength[index]; - if (valueILength < 0 - || ((valueILength == nullSequence.getLength()) && 0 == LazyUtils - .compare(bytes.getData(), valueIBegin, valueILength, nullSequence - .getBytes(), 0, nullSequence.getLength()))) { + if (isNull(oi.getNullSequence(), bytes, valueIBegin, valueILength)) { return valueObjects[index] = null; } valueObjects[index] = LazyFactory diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java index e4cffc9..6a4dcaa 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.serde2.lazy; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.Text; /** * LazyPrimitive stores a primitive Object in a LazyObject. @@ -49,9 +50,7 @@ protected LazyNonPrimitive(OI oi) { @Override public void init(ByteArrayRef bytes, int start, int length) { - if (bytes == null) { - throw new RuntimeException("bytes cannot be null!"); - } + super.init(bytes, start, length); this.bytes = bytes; this.start = start; this.length = length; @@ -59,9 +58,18 @@ public void init(ByteArrayRef bytes, int start, int length) { assert start + length <= bytes.getData().length; } - @Override - public Object getObject() { - return this; + protected final boolean isNull( + Text nullSequence, ByteArrayRef ref, int fieldByteBegin, int fieldLength) { + return ref == null || isNull(nullSequence, ref.getData(), fieldByteBegin, fieldLength); + } + + protected final boolean isNull( + Text nullSequence, byte[] bytes, int fieldByteBegin, int fieldLength) { + // Test the length first so in most cases we avoid doing a byte[] + // comparison. + return fieldLength < 0 || (fieldLength == nullSequence.getLength() && + LazyUtils.compare(bytes, fieldByteBegin, fieldLength, + nullSequence.getBytes(), 0, nullSequence.getLength()) == 0); } @Override diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java index 9b5ccbe..3c7e97f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java @@ -50,4 +50,28 @@ protected OI getInspector() { protected void setInspector(OI oi) { this.oi = oi; } + + protected boolean isNull; + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + if (bytes == null) { + throw new RuntimeException("bytes cannot be null!"); + } + this.isNull = false; + } + + @Override + public void setNull() { + this.isNull = true; + } + + /** + * Returns the primitive object represented by this LazyObject. This is useful + * because it can make sure we have "null" for null objects. + */ + @Override + public Object getObject() { + return isNull ? null : this; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java index 7e42b3f..a9b9128 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java @@ -36,6 +36,11 @@ void init(ByteArrayRef bytes, int start, int length); /** + * called for null binary, hbase columns, for example + */ + void setNull(); + + /** * If the LazyObjectBase is a primitive Object, then deserialize it and return the * actual primitive Object. Otherwise (array, map, struct), return this. */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java index 7d23c46..32224a8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java @@ -42,16 +42,6 @@ protected LazyPrimitive(LazyPrimitive copy) { } protected T data; - protected boolean isNull = false; - - /** - * Returns the primitive object represented by this LazyObject. This is useful - * because it can make sure we have "null" for null objects. - */ - @Override - public Object getObject() { - return isNull ? null : this; - } public T getWritableObject() { return isNull ? null : data; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java index 75b9556..539d767 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java @@ -45,6 +45,7 @@ public void init(ByteArrayRef bytes, int start, int length) { // if the data is not escaped, simply copy the data. data.set(bytes.getData(), start, length); } + isNull = false; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java index 588cc8c..62df4eb 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.Text; /** * LazyObject for storing a struct. The field of a struct can be primitive or @@ -215,20 +214,15 @@ public Object getField(int fieldID) { * @return The value of the field */ private Object uncheckedGetField(int fieldID) { - Text nullSequence = oi.getNullSequence(); - // Test the length first so in most cases we avoid doing a byte[] - // comparison. - int fieldByteBegin = startPosition[fieldID]; - int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; - if ((fieldLength < 0) - || (fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes - .getData(), fieldByteBegin, fieldLength, nullSequence.getBytes(), - 0, nullSequence.getLength()) == 0)) { - return null; - } if (!fieldInited[fieldID]) { fieldInited[fieldID] = true; - fields[fieldID].init(bytes, fieldByteBegin, fieldLength); + int fieldByteBegin = startPosition[fieldID]; + int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; + if (isNull(oi.getNullSequence(), bytes, fieldByteBegin, fieldLength)) { + fields[fieldID].setNull(); + } else { + fields[fieldID].init(bytes, fieldByteBegin, fieldLength); + } } return fields[fieldID].getObject(); } @@ -255,11 +249,6 @@ private Object uncheckedGetField(int fieldID) { return cachedList; } - @Override - public Object getObject() { - return this; - } - protected boolean getParsed() { return parsed; } @@ -351,4 +340,4 @@ public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) { public byte[] getBytes() { return bytes.getData(); } -} \ No newline at end of file +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java index b3625b3..3eb75a2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java @@ -42,6 +42,10 @@ protected LazyBinaryObject(OI oi) { this.oi = oi; } + public void setNull() { + throw new IllegalStateException("should not be called"); + } + @Override public abstract int hashCode(); }