diff --git accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloMap.java accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloMap.java index effdc4b..7bcec06 100644 --- accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloMap.java +++ accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloMap.java @@ -90,9 +90,14 @@ protected void parse() { LazyObject value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), ColumnEncoding.BINARY == columnMapping.getValueEncoding()); - ByteArrayRef valueRef = new ByteArrayRef(); - valueRef.setData(tuple.getValue()); - value.init(valueRef, 0, valueRef.getData().length); + byte[] bytes = tuple.getValue(); + if (bytes == null || isNull(oi.getNullSequence(), bytes, 0, bytes.length)) { + value.setNull(); + } else { + ByteArrayRef valueRef = new ByteArrayRef(); + valueRef.setData(bytes); + value.init(valueRef, 0, valueRef.getData().length); + } cachedMap.put(key, value); } diff --git accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloRow.java accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloRow.java index 3dc2d3c..4597f5c 100644 --- accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloRow.java +++ accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloRow.java @@ -85,47 +85,48 @@ public Object getField(int id) { * split pairs by delimiter. */ private Object uncheckedGetField(int id) { - if (!getFieldInited()[id]) { - ByteArrayRef ref; - ColumnMapping columnMapping = columnMappings.get(id); + if (getFieldInited()[id]) { + return getFields()[id].getObject(); + } + getFieldInited()[id] = true; + + ColumnMapping columnMapping = columnMappings.get(id); - if (columnMapping instanceof HiveAccumuloMapColumnMapping) { - HiveAccumuloMapColumnMapping mapColumnMapping = (HiveAccumuloMapColumnMapping) columnMapping; + LazyObjectBase field = getFields()[id]; - LazyAccumuloMap map = (LazyAccumuloMap) getFields()[id]; - map.init(row, mapColumnMapping); + if (columnMapping instanceof HiveAccumuloMapColumnMapping) { + HiveAccumuloMapColumnMapping mapColumnMapping = (HiveAccumuloMapColumnMapping) columnMapping; + + LazyAccumuloMap map = (LazyAccumuloMap) field; + map.init(row, mapColumnMapping); + } else { + byte[] value; + if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) { + // Use the rowID directly + value = row.getRowId().getBytes(); + } else if (columnMapping instanceof HiveAccumuloColumnMapping) { + HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping; + + // Use the colfam and colqual to get the value + value = row.getValue( + new Text(accumuloColumnMapping.getColumnFamilyBytes()), + new Text(accumuloColumnMapping.getColumnQualifierBytes())); } else { - if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) { - // Use the rowID directly - ref = new ByteArrayRef(); - ref.setData(row.getRowId().getBytes()); - } else if (columnMapping instanceof HiveAccumuloColumnMapping) { - HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping; - - // Use the colfam and colqual to get the value - byte[] val = row.getValue(new Text(accumuloColumnMapping.getColumnFamily()), new Text( - accumuloColumnMapping.getColumnQualifier())); - if (val == null) { - return null; - } else { - ref = new ByteArrayRef(); - ref.setData(val); - } - } else { - log.error("Could not process ColumnMapping of type " + columnMapping.getClass() - + " at offset " + id + " in column mapping: " + columnMapping.getMappingSpec()); - throw new IllegalArgumentException("Cannot process ColumnMapping of type " - + columnMapping.getClass()); - } - - getFields()[id].init(ref, 0, ref.getData().length); + log.error("Could not process ColumnMapping of type " + columnMapping.getClass() + + " at offset " + id + " in column mapping: " + columnMapping.getMappingSpec()); + throw new IllegalArgumentException("Cannot process ColumnMapping of type " + + columnMapping.getClass()); + } + if (value == null || isNull(oi.getNullSequence(), value, 0, value.length)) { + field.setNull(); + } else { + ByteArrayRef ref = new ByteArrayRef(); + ref.setData(value); + field.init(ref, 0, value.length); } - - // HIVE-3179 only init the field when it isn't null - getFieldInited()[id] = true; } - return getFields()[id].getObject(); + return field.getObject(); } @Override diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java index 8c1ec8d..f835a96 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java @@ -21,7 +21,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.log4j.Logger; @@ -33,9 +32,6 @@ private Properties tbl; private Configuration conf; - private byte[] bytes; - private int start, length; - private String bytesAsString; public FirstCharAccumuloCompositeRowId(LazySimpleStructObjectInspector oi, Properties tbl, Configuration conf) { @@ -45,19 +41,10 @@ public FirstCharAccumuloCompositeRowId(LazySimpleStructObjectInspector oi, Prope } @Override - public void init(ByteArrayRef bytes, int start, int length) { - this.bytes = bytes.getData(); - this.start = start; - this.length = length; - } - - @Override public Object getField(int fieldID) { - if (bytesAsString == null) { - this.bytesAsString = new String(bytes, start, length); - } + String bytesAsString = new String(bytes.getData(), start, length); - log.info("Data: " + bytesAsString + ", " + Arrays.toString(bytes)); + log.info("Data: " + bytesAsString + ", " + Arrays.toString(bytes.getData())); // The separator for the hive row would be using \x02, so the separator for this struct would be // \x02 + 1 = \x03 diff --git data/files/cbo_t1.txt data/files/cbo_t1.txt index e8034a4..8a0fbec 100644 --- data/files/cbo_t1.txt +++ data/files/cbo_t1.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t2.txt data/files/cbo_t2.txt index 34633d3..4e86437 100644 --- data/files/cbo_t2.txt +++ data/files/cbo_t2.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t3.txt data/files/cbo_t3.txt index a9c995e..87b1613f 100644 --- data/files/cbo_t3.txt +++ data/files/cbo_t3.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t4.txt data/files/cbo_t4.txt index 56e0794..22825b3 100644 --- data/files/cbo_t4.txt +++ data/files/cbo_t4.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t5.txt data/files/cbo_t5.txt index 245b1b9..9dadc77 100644 --- data/files/cbo_t5.txt +++ data/files/cbo_t5.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git data/files/cbo_t6.txt data/files/cbo_t6.txt index dd72edd..5f25ccb 100644 --- data/files/cbo_t6.txt +++ data/files/cbo_t6.txt @@ -16,5 +16,5 @@ 1 ,1 ,1,1,true 1,1,1,1,false 1,1,1,1,false -null,null,null,null,null -null,null,null,null,null +\N,\N,\N,\N,\N +\N,\N,\N,\N,\N diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java index 2727b36..09cbf52 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java @@ -66,8 +66,8 @@ public void init( Result r, byte [] columnFamilyBytes, List binaryStorage, byte[] qualPrefix) { - - result = r; + this.isNull = false; + this.result = r; this.columnFamilyBytes = columnFamilyBytes; this.binaryStorage = binaryStorage; this.qualPrefix = qualPrefix; @@ -114,9 +114,15 @@ private void parse() { LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), binaryStorage.get(1)); - ByteArrayRef valueRef = new ByteArrayRef(); - valueRef.setData(e.getValue()); - value.init(valueRef, 0, valueRef.getData().length); + byte[] bytes = e.getValue(); + + if (isNull(oi.getNullSequence(), bytes, 0, bytes.length)) { + value.setNull(); + } else { + ByteArrayRef valueRef = new ByteArrayRef(); + valueRef.setData(bytes); + value.init(valueRef, 0, valueRef.getData().length); + } // Put the key/value into the map cachedMap.put(key.getObject(), value.getObject()); diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java index 8a3eac7..868d81f 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java @@ -138,13 +138,21 @@ private Object uncheckedGetField(int fieldID) { boolean [] fieldsInited = getFieldInited(); if (!fieldsInited[fieldID]) { - ByteArrayRef ref = null; + fieldsInited[fieldID] = true; + ColumnMapping colMap = columnsMapping[fieldID]; - if (colMap.hbaseRowKey) { - ref = new ByteArrayRef(); - ref.setData(result.getRow()); - } else if (colMap.hbaseTimestamp) { + if (!colMap.hbaseRowKey && !colMap.hbaseTimestamp && colMap.qualifierName == null) { + // it is a column family + // primitive type for Map can be stored in binary format. Pass in the + // qualifier prefix to cherry pick the qualifiers that match the prefix instead of picking + // up everything + ((LazyHBaseCellMap) fields[fieldID]).init( + result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes); + return fields[fieldID].getObject(); + } + + if (colMap.hbaseTimestamp) { long timestamp = result.rawCells()[0].getTimestamp(); // from hbase-0.96.0 LazyObjectBase lz = fields[fieldID]; if (lz instanceof LazyTimestamp) { @@ -152,35 +160,25 @@ private Object uncheckedGetField(int fieldID) { } else { ((LazyLong) lz).getWritableObject().set(timestamp); } - } else { - if (colMap.qualifierName == null) { - // it is a column family - // primitive type for Map can be stored in binary format. Pass in the - // qualifier prefix to cherry pick the qualifiers that match the prefix instead of picking - // up everything - ((LazyHBaseCellMap) fields[fieldID]).init( - result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes); - } else { - // it is a column i.e. a column-family with column-qualifier - byte [] res = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes); - - if (res == null) { - return null; - } else { - ref = new ByteArrayRef(); - ref.setData(res); - } - } + return lz.getObject(); } - if (ref != null) { - fields[fieldID].init(ref, 0, ref.getData().length); + byte[] bytes; + if (colMap.hbaseRowKey) { + bytes = result.getRow(); + } else { + // it is a column i.e. a column-family with column-qualifier + bytes = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes); + } + if (bytes == null || isNull(oi.getNullSequence(), bytes, 0, bytes.length)) { + fields[fieldID].setNull(); + } else { + ByteArrayRef ref = new ByteArrayRef(); + ref.setData(bytes); + fields[fieldID].init(ref, 0, bytes.length); } } - // Has to be set last because of HIVE-3179: NULL fields would not work otherwise - fieldsInited[fieldID] = true; - return fields[fieldID].getObject(); } diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory.java index 2d647e2..9541f18 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory.java @@ -71,15 +71,22 @@ public LazyObjectBase createKey(ObjectInspector inspector) throws SerDeException private static class DoubleDollarSeparated implements LazyObjectBase { private Object[] fields; + private transient boolean isNull; @Override public void init(ByteArrayRef bytes, int start, int length) { fields = new String(bytes.getData(), start, length).split(DELIMITER_PATTERN); + isNull = false; + } + + @Override + public void setNull() { + isNull = true; } @Override public Object getObject() { - return this; + return isNull ? null : this; } } diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory2.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory2.java index 7086d57..24fa203 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory2.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/SampleHBaseKeyFactory2.java @@ -200,6 +200,8 @@ private HBaseScanRange setupFilter(String keyColName, List private final int fixedLength; private final List fields = new ArrayList(); + private transient boolean isNull; + public FixedLengthed(int fixedLength) { this.fixedLength = fixedLength; } @@ -213,11 +215,17 @@ public void init(ByteArrayRef bytes, int start, int length) { for (; rowStart < length; rowStart = rowStop + 1, rowStop = rowStart + fixedLength) { fields.add(new String(data, rowStart, rowStop - rowStart).trim()); } + isNull = false; + } + + @Override + public void setNull() { + isNull = true; } @Override public Object getObject() { - return this; + return isNull ? null : this; } } diff --git ql/src/test/results/clientpositive/cbo_gby.q.out ql/src/test/results/clientpositive/cbo_gby.q.out index 7344c11..3dc8914 100644 --- ql/src/test/results/clientpositive/cbo_gby.q.out +++ ql/src/test/results/clientpositive/cbo_gby.q.out @@ -10,7 +10,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### -null NULL NULL +NULL NULL NULL 1 4 2 1 4 2 1 4 12 diff --git ql/src/test/results/clientpositive/cbo_join.q.out ql/src/test/results/clientpositive/cbo_join.q.out index e4e4e25..af34f5f 100644 --- ql/src/test/results/clientpositive/cbo_join.q.out +++ ql/src/test/results/clientpositive/cbo_join.q.out @@ -110,10 +110,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1 1 1 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -126,46 +122,46 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL 1 1 1 @@ -636,6 +632,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL +NULL NULL 1 1 1 1 1 1 @@ -732,10 +730,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1 1 1 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -750,6 +744,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL +NULL NULL 1 1 1 1 1 1 @@ -851,10 +847,6 @@ NULL 2 NULL 2 NULL 2 NULL 2 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -869,6 +861,10 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL +NULL NULL +NULL NULL +NULL NULL 1 1 1 1 1 1 @@ -970,10 +966,6 @@ NULL 2 NULL 2 NULL 2 NULL 2 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -1518,14 +1510,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -2070,14 +2054,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1 1 1 1 1 1 1 1 1 -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: select a, cbo_t1.b, key, cbo_t2.c_int, cbo_t3.p from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join cbo_t2 on cbo_t1.a=key join (select key as p, c_int as q, cbo_t3.c_float as r from cbo_t3)cbo_t3 on cbo_t1.a=cbo_t3.p PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -2622,14 +2598,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null PREHOOK: query: select b, cbo_t1.c, cbo_t2.c_int, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join cbo_t2 on cbo_t1.a=cbo_t2.key join cbo_t3 on cbo_t1.a=cbo_t3.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -3174,14 +3142,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1.0 1 1 1 1.0 1 1 -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL PREHOOK: query: select cbo_t3.c_int, b, cbo_t2.c_int, cbo_t1.c from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join cbo_t2 on cbo_t1.a=cbo_t2.key join cbo_t3 on cbo_t1.a=cbo_t3.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -3726,14 +3686,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1.0 1 1 1 1.0 1 1 1 1.0 -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 left outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -4278,14 +4230,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p left outer join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -4830,14 +4774,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1 1 1 1 1 1 1 1 1 -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5382,14 +5318,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p right outer join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5406,6 +5334,8 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### +NULL NULL NULL NULL +NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -5940,14 +5870,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6492,14 +6414,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p full outer join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6516,6 +6430,8 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### +NULL NULL NULL NULL +NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -7050,14 +6966,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: -- 5. Test Select + Join + FIL + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/cbo_limit.q.out ql/src/test/results/clientpositive/cbo_limit.q.out index f4385fa..c059399 100644 --- ql/src/test/results/clientpositive/cbo_limit.q.out +++ ql/src/test/results/clientpositive/cbo_limit.q.out @@ -10,7 +10,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### -null NULL NULL +NULL NULL NULL PREHOOK: query: select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x order by x,y limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -47,8 +47,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### -null NULL -null NULL +NULL NULL +NULL NULL 1 1 1 1 1 1 diff --git ql/src/test/results/clientpositive/cbo_semijoin.q.out ql/src/test/results/clientpositive/cbo_semijoin.q.out index b7f6be4..bdd8125 100644 --- ql/src/test/results/clientpositive/cbo_semijoin.q.out +++ ql/src/test/results/clientpositive/cbo_semijoin.q.out @@ -32,8 +32,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 -NULL -NULL PREHOOK: query: select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/cbo_simple_select.q.out ql/src/test/results/clientpositive/cbo_simple_select.q.out index 87b85cf..b392637 100644 --- ql/src/test/results/clientpositive/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_simple_select.q.out @@ -28,8 +28,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 PREHOOK: query: select * from cbo_t1 as cbo_t1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -58,8 +58,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 PREHOOK: query: select * from cbo_t1 as cbo_t2 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -88,8 +88,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 PREHOOK: query: select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -118,8 +118,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 25.0 1 1 25.0 1 1 25.0 -null NULL NULL -null NULL NULL +NULL NULL NULL +NULL NULL NULL PREHOOK: query: -- 2. Test Select + TS + FIL select * from cbo_t1 where cbo_t1.c_int >= 0 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/cbo_udf_udaf.q.out ql/src/test/results/clientpositive/cbo_udf_udaf.q.out index ed3a3e7..6844cbb 100644 --- ql/src/test/results/clientpositive/cbo_udf_udaf.q.out +++ ql/src/test/results/clientpositive/cbo_udf_udaf.q.out @@ -78,11 +78,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +0 NULL NULL 2 1.0 1 2 1.0 1 12 1.0 1 2 1.0 1 -0 NULL null PREHOOK: query: select count(distinct c_int) as a, avg(c_float) from cbo_t1 group by c_float PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/cbo_union.q.out ql/src/test/results/clientpositive/cbo_union.q.out index eb02b03..813a0a0 100644 --- ql/src/test/results/clientpositive/cbo_union.q.out +++ ql/src/test/results/clientpositive/cbo_union.q.out @@ -14,6 +14,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 @@ -32,8 +34,8 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 @@ -52,8 +54,6 @@ null null NULL NULL NULL 2014 2 2 2 2.0 true 2014 2 2 2 2.0 true 2014 2 2 2 2.0 true 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 PREHOOK: query: select key from (select key, c_int from (select * from cbo_t1 union all select * from cbo_t2 where cbo_t2.key >=0)r1 union all select key, c_int from cbo_t3)r2 where key >=0 order by key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/cbo_views.q.out ql/src/test/results/clientpositive/cbo_views.q.out index 10fa8c8..4a7b935 100644 --- ql/src/test/results/clientpositive/cbo_views.q.out +++ ql/src/test/results/clientpositive/cbo_views.q.out @@ -97,7 +97,7 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@v1 #### A masked pattern was here #### -160 +156 PREHOOK: query: create view v3 as select v1.value val from v1 join cbo_t1 on v1.c_boolean = cbo_t1.c_boolean PREHOOK: type: CREATEVIEW PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/cbo_windowing.q.out ql/src/test/results/clientpositive/cbo_windowing.q.out index a96def8..1a274b0 100644 --- ql/src/test/results/clientpositive/cbo_windowing.q.out +++ ql/src/test/results/clientpositive/cbo_windowing.q.out @@ -171,6 +171,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +NULL NULL 0 NULL NULL NULL NULL NULL NULL +NULL NULL 0 NULL NULL NULL NULL NULL NULL 1 1 1 1.0 1.0 1.0 1.0 1.0 1.0 1 1 2 1.0 2.0 1.0 2.0 1.0 1.0 1 1 1 1.0 1.0 1.0 1.0 1.0 1.0 @@ -189,8 +191,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 2 1.0 12.0 1.0 2.0 1.0 1.0 1 1 1 1.0 1.0 1.0 1.0 1.0 1.0 1 1 2 1.0 2.0 1.0 2.0 1.0 1.0 -NULL NULL 0 NULL 0.0 NULL NULL NULL NULL -NULL NULL 0 NULL 0.0 NULL NULL NULL NULL PREHOOK: query: select i, a, h, b, c, d, e, f, g, a as x, a +1 as y from (select max(c_int) over (partition by key order by value range UNBOUNDED PRECEDING) a, min(c_int) over (partition by key order by value range current row) b, count(c_int) over(partition by key order by value range 1 PRECEDING) c, avg(value) over (partition by key order by value range between unbounded preceding and unbounded following) d, sum(value) over (partition by key order by value range between unbounded preceding and current row) e, avg(c_float) over (partition by key order by value range between 1 preceding and unbounded following) f, sum(c_float) over (partition by key order by value range between 1 preceding and current row) g, max(c_float) over (partition by key order by value range between 1 preceding and unbounded following) h, min(c_float) over (partition by key order by value range between 1 preceding and 1 following) i from cbo_t1) cbo_t1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -201,6 +201,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 @@ -219,8 +221,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1.0 1 1.0 1 12 1.0 12.0 1.0 12.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 -NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL -NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL PREHOOK: query: select *, rank() over(partition by key order by value) as rr from src1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 diff --git ql/src/test/results/clientpositive/tez/cbo_gby.q.out ql/src/test/results/clientpositive/tez/cbo_gby.q.out index 7344c11..3dc8914 100644 --- ql/src/test/results/clientpositive/tez/cbo_gby.q.out +++ ql/src/test/results/clientpositive/tez/cbo_gby.q.out @@ -10,7 +10,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### -null NULL NULL +NULL NULL NULL 1 4 2 1 4 2 1 4 12 diff --git ql/src/test/results/clientpositive/tez/cbo_join.q.out ql/src/test/results/clientpositive/tez/cbo_join.q.out index e4e4e25..af34f5f 100644 --- ql/src/test/results/clientpositive/tez/cbo_join.q.out +++ ql/src/test/results/clientpositive/tez/cbo_join.q.out @@ -110,10 +110,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1 1 1 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -126,46 +122,46 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null -null +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL 1 1 1 @@ -636,6 +632,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL +NULL NULL 1 1 1 1 1 1 @@ -732,10 +730,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1 1 1 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -750,6 +744,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL +NULL NULL 1 1 1 1 1 1 @@ -851,10 +847,6 @@ NULL 2 NULL 2 NULL 2 NULL 2 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -869,6 +861,10 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL +NULL NULL +NULL NULL +NULL NULL 1 1 1 1 1 1 @@ -970,10 +966,6 @@ NULL 2 NULL 2 NULL 2 NULL 2 -NULL NULL -NULL NULL -NULL NULL -NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -1518,14 +1510,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -2070,14 +2054,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1 1 1 1 1 1 1 1 1 -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: select a, cbo_t1.b, key, cbo_t2.c_int, cbo_t3.p from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join cbo_t2 on cbo_t1.a=key join (select key as p, c_int as q, cbo_t3.c_float as r from cbo_t3)cbo_t3 on cbo_t1.a=cbo_t3.p PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -2622,14 +2598,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null -null NULL null NULL null PREHOOK: query: select b, cbo_t1.c, cbo_t2.c_int, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join cbo_t2 on cbo_t1.a=cbo_t2.key join cbo_t3 on cbo_t1.a=cbo_t3.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -3174,14 +3142,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1.0 1 1 1 1.0 1 1 -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL PREHOOK: query: select cbo_t3.c_int, b, cbo_t2.c_int, cbo_t1.c from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join cbo_t2 on cbo_t1.a=cbo_t2.key join cbo_t3 on cbo_t1.a=cbo_t3.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -3726,14 +3686,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1.0 1 1 1 1.0 1 1 1 1.0 -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL -NULL NULL NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 left outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -4278,14 +4230,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p left outer join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -4830,14 +4774,6 @@ POSTHOOK: Input: default@cbo_t3 1 1 1 1 1 1 1 1 1 1 1 1 -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5382,14 +5318,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p right outer join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5406,6 +5334,8 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### +NULL NULL NULL NULL +NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -5940,14 +5870,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6492,14 +6414,6 @@ POSTHOOK: Input: default@cbo_t3 1 1.0 1 1 1 1 1.0 1 1 1 1 1.0 1 1 1 -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL -NULL NULL null NULL NULL PREHOOK: query: select key, cbo_t1.c_int, cbo_t2.p, q from cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.key=p full outer join (select key as a, c_int as b, cbo_t3.c_float as c from cbo_t3)cbo_t3 on cbo_t1.key=a PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6516,6 +6430,8 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### +NULL NULL NULL NULL +NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -7050,14 +6966,6 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL -null NULL null NULL PREHOOK: query: -- 5. Test Select + Join + FIL + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + cbo_t2.c_int == 2) and (cbo_t1.c_int > 0 or cbo_t2.c_float >= 0) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/cbo_limit.q.out ql/src/test/results/clientpositive/tez/cbo_limit.q.out index f4385fa..c059399 100644 --- ql/src/test/results/clientpositive/tez/cbo_limit.q.out +++ ql/src/test/results/clientpositive/tez/cbo_limit.q.out @@ -10,7 +10,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### -null NULL NULL +NULL NULL NULL PREHOOK: query: select x, y, count(*) from (select key, (c_int+c_float+1+2) as x, sum(c_int) as y from cbo_t1 group by c_float, cbo_t1.c_int, key) R group by y, x order by x,y limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -47,8 +47,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### -null NULL -null NULL +NULL NULL +NULL NULL 1 1 1 1 1 1 diff --git ql/src/test/results/clientpositive/tez/cbo_semijoin.q.out ql/src/test/results/clientpositive/tez/cbo_semijoin.q.out index b7f6be4..bdd8125 100644 --- ql/src/test/results/clientpositive/tez/cbo_semijoin.q.out +++ ql/src/test/results/clientpositive/tez/cbo_semijoin.q.out @@ -32,8 +32,6 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 -NULL -NULL PREHOOK: query: select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/tez/cbo_simple_select.q.out ql/src/test/results/clientpositive/tez/cbo_simple_select.q.out index 87b85cf..b392637 100644 --- ql/src/test/results/clientpositive/tez/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/tez/cbo_simple_select.q.out @@ -28,8 +28,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 PREHOOK: query: select * from cbo_t1 as cbo_t1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -58,8 +58,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 PREHOOK: query: select * from cbo_t1 as cbo_t2 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -88,8 +88,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 false 2014 1 1 1 1.0 false 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 PREHOOK: query: select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -118,8 +118,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 25.0 1 1 25.0 1 1 25.0 -null NULL NULL -null NULL NULL +NULL NULL NULL +NULL NULL NULL PREHOOK: query: -- 2. Test Select + TS + FIL select * from cbo_t1 where cbo_t1.c_int >= 0 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/cbo_udf_udaf.q.out ql/src/test/results/clientpositive/tez/cbo_udf_udaf.q.out index ed3a3e7..6844cbb 100644 --- ql/src/test/results/clientpositive/tez/cbo_udf_udaf.q.out +++ ql/src/test/results/clientpositive/tez/cbo_udf_udaf.q.out @@ -78,11 +78,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +0 NULL NULL 2 1.0 1 2 1.0 1 12 1.0 1 2 1.0 1 -0 NULL null PREHOOK: query: select count(distinct c_int) as a, avg(c_float) from cbo_t1 group by c_float PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/tez/cbo_union.q.out ql/src/test/results/clientpositive/tez/cbo_union.q.out index eb02b03..813a0a0 100644 --- ql/src/test/results/clientpositive/tez/cbo_union.q.out +++ ql/src/test/results/clientpositive/tez/cbo_union.q.out @@ -14,6 +14,8 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 @@ -32,8 +34,8 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 +NULL NULL NULL NULL NULL 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 @@ -52,8 +54,6 @@ null null NULL NULL NULL 2014 2 2 2 2.0 true 2014 2 2 2 2.0 true 2014 2 2 2 2.0 true 2014 -null null NULL NULL NULL 2014 -null null NULL NULL NULL 2014 PREHOOK: query: select key from (select key, c_int from (select * from cbo_t1 union all select * from cbo_t2 where cbo_t2.key >=0)r1 union all select key, c_int from cbo_t3)r2 where key >=0 order by key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/tez/cbo_views.q.out ql/src/test/results/clientpositive/tez/cbo_views.q.out index 10fa8c8..4a7b935 100644 --- ql/src/test/results/clientpositive/tez/cbo_views.q.out +++ ql/src/test/results/clientpositive/tez/cbo_views.q.out @@ -97,7 +97,7 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@v1 #### A masked pattern was here #### -160 +156 PREHOOK: query: create view v3 as select v1.value val from v1 join cbo_t1 on v1.c_boolean = cbo_t1.c_boolean PREHOOK: type: CREATEVIEW PREHOOK: Input: default@cbo_t1 diff --git ql/src/test/results/clientpositive/tez/cbo_windowing.q.out ql/src/test/results/clientpositive/tez/cbo_windowing.q.out index a96def8..1a274b0 100644 --- ql/src/test/results/clientpositive/tez/cbo_windowing.q.out +++ ql/src/test/results/clientpositive/tez/cbo_windowing.q.out @@ -171,6 +171,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +NULL NULL 0 NULL NULL NULL NULL NULL NULL +NULL NULL 0 NULL NULL NULL NULL NULL NULL 1 1 1 1.0 1.0 1.0 1.0 1.0 1.0 1 1 2 1.0 2.0 1.0 2.0 1.0 1.0 1 1 1 1.0 1.0 1.0 1.0 1.0 1.0 @@ -189,8 +191,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 2 1.0 12.0 1.0 2.0 1.0 1.0 1 1 1 1.0 1.0 1.0 1.0 1.0 1.0 1 1 2 1.0 2.0 1.0 2.0 1.0 1.0 -NULL NULL 0 NULL 0.0 NULL NULL NULL NULL -NULL NULL 0 NULL 0.0 NULL NULL NULL NULL PREHOOK: query: select i, a, h, b, c, d, e, f, g, a as x, a +1 as y from (select max(c_int) over (partition by key order by value range UNBOUNDED PRECEDING) a, min(c_int) over (partition by key order by value range current row) b, count(c_int) over(partition by key order by value range 1 PRECEDING) c, avg(value) over (partition by key order by value range between unbounded preceding and unbounded following) d, sum(value) over (partition by key order by value range between unbounded preceding and current row) e, avg(c_float) over (partition by key order by value range between 1 preceding and unbounded following) f, sum(c_float) over (partition by key order by value range between 1 preceding and current row) g, max(c_float) over (partition by key order by value range between 1 preceding and unbounded following) h, min(c_float) over (partition by key order by value range between 1 preceding and 1 following) i from cbo_t1) cbo_t1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -201,6 +201,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 #### A masked pattern was here #### +NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 @@ -219,8 +221,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1.0 1 1.0 1 12 1.0 12.0 1.0 12.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 1.0 1 1.0 1 2 1.0 2.0 1.0 2.0 1 2 -NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL -NULL NULL NULL NULL 0 NULL 0.0 NULL NULL NULL NULL PREHOOK: query: select *, rank() over(partition by key order by value) as rr from src1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java index a6e8c2c..2fb1c28 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java @@ -110,16 +110,11 @@ public void setSchemaRetriever(AvroSchemaRetriever schemaRetriever) { @SuppressWarnings("unchecked") @Override - public Object getStructFieldData(Object data, StructField fieldRef) { + public Object getStructFieldData(Object data, StructField f) { if (data == null) { return null; } - if (!(fieldRef instanceof MyField)) { - throw new IllegalArgumentException("fieldRef has to be of MyField"); - } - - MyField f = (MyField) fieldRef; int fieldID = f.getFieldID(); if (LOG.isDebugEnabled()) { @@ -189,7 +184,7 @@ public Object getStructFieldData(Object data, StructField fieldRef) { } // convert to a lazy object and return - return toLazyObject(field, fieldRef.getFieldObjectInspector()); + return toLazyObject(field, f.getFieldObjectInspector()); } } @@ -464,13 +459,13 @@ private Object toLazyUnionObject(Object obj, ObjectInspector objectInspector) { } StandardUnion standardUnion = (StandardUnion) obj; + LazyUnionObjectInspector lazyUnionOI = (LazyUnionObjectInspector) objectInspector; // Grab the tag and the field byte tag = standardUnion.getTag(); Object field = standardUnion.getObject(); - ObjectInspector fieldOI = - ((LazyUnionObjectInspector) objectInspector).getObjectInspectors().get(tag); + ObjectInspector fieldOI = lazyUnionOI.getObjectInspectors().get(tag); // convert to lazy object Object convertedObj = null; @@ -483,12 +478,7 @@ private Object toLazyUnionObject(Object obj, ObjectInspector objectInspector) { return null; } - LazyUnion lazyUnion = (LazyUnion) LazyFactory.createLazyObject(objectInspector); - - lazyUnion.setField(convertedObj); - lazyUnion.setTag(tag); - - return lazyUnion; + return new LazyUnion(lazyUnionOI, tag, convertedObj); } /** diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java index ce0cfb3..15b55d1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java @@ -22,7 +22,6 @@ import java.util.List; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector; -import org.apache.hadoop.io.Text; /** * LazyArray stores an array of Lazy Objects. @@ -176,22 +175,20 @@ public Object getListElementObject(int index) { */ private Object uncheckedGetElement(int index) { if (elementInited[index]) { - return arrayElements[index] == null ? null : arrayElements[index].getObject(); + return arrayElements[index].getObject(); } elementInited[index] = true; - Text nullSequence = oi.getNullSequence(); - - int elementLength = startPosition[index + 1] - startPosition[index] - 1; - if (elementLength == nullSequence.getLength() - && 0 == LazyUtils - .compare(bytes.getData(), startPosition[index], elementLength, - nullSequence.getBytes(), 0, nullSequence.getLength())) { - return arrayElements[index] = null; + int elementStart = startPosition[index]; + int elementLength = startPosition[index + 1] - elementStart - 1; + if (arrayElements[index] == null) { + arrayElements[index] = LazyFactory.createLazyObject(oi.getListElementObjectInspector()); + } + if (isNull(oi.getNullSequence(), bytes, elementStart, elementLength)) { + arrayElements[index].setNull(); + } else { + arrayElements[index].init(bytes, elementStart, elementLength); } - arrayElements[index] = LazyFactory - .createLazyObject(oi.getListElementObjectInspector()); - arrayElements[index].init(bytes, startPosition[index], elementLength); return arrayElements[index].getObject(); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java index c25a1b8..982e6d3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java @@ -18,14 +18,11 @@ package org.apache.hadoop.hive.serde2.lazy; -import java.nio.charset.CharacterCodingException; - import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; public class LazyBinary extends LazyPrimitive { @@ -46,18 +43,21 @@ public LazyBinary(LazyBinary other){ @Override public void init(ByteArrayRef bytes, int start, int length) { - + super.init(bytes, start, length); byte[] recv = new byte[length]; System.arraycopy(bytes.getData(), start, recv, 0, length); - boolean arrayByteBase64 = Base64.isArrayByteBase64(recv); - if (arrayByteBase64) { - LOG.debug("Data not contains valid characters within the Base64 alphabet so " + - "decoded the data."); - } - byte[] decoded = arrayByteBase64 ? Base64.decodeBase64(recv) : recv; + byte[] decoded = decodeIfNeeded(recv); // use the original bytes in case decoding should fail decoded = decoded.length > 0 ? decoded : recv; data.set(decoded, 0, decoded.length); } + // todo this should be configured in serde + private byte[] decodeIfNeeded(byte[] recv) { + boolean arrayByteBase64 = Base64.isArrayByteBase64(recv); + if (arrayByteBase64) { + LOG.debug("Data only contains Base64 alphabets only so try to decode the data."); + } + return arrayByteBase64 ? Base64.decodeBase64(recv) : recv; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java index e6932d9..9dd7c7e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java @@ -248,8 +248,7 @@ public Object getMapValueElement(Object key) { } if (keyI.equals(key)) { // Got a match, return the value - LazyObject v = uncheckedGetValue(i); - return v == null ? v : v.getObject(); + return uncheckedGetValue(i); } } @@ -262,24 +261,23 @@ public Object getMapValueElement(Object key) { * @param index * The index into the array starting from 0 */ - private LazyObject uncheckedGetValue(int index) { + private Object uncheckedGetValue(int index) { if (valueInited[index]) { - return valueObjects[index]; + return valueObjects[index].getObject(); } valueInited[index] = true; Text nullSequence = oi.getNullSequence(); int valueIBegin = keyEnd[index] + 1; int valueILength = valueLength[index]; - if (valueILength < 0 - || ((valueILength == nullSequence.getLength()) && 0 == LazyUtils - .compare(bytes.getData(), valueIBegin, valueILength, nullSequence - .getBytes(), 0, nullSequence.getLength()))) { - return valueObjects[index] = null; + if (valueObjects[index] == null) { + valueObjects[index] = LazyFactory.createLazyObject(oi.getMapValueObjectInspector()); } - valueObjects[index] = LazyFactory - .createLazyObject(oi.getMapValueObjectInspector()); - valueObjects[index].init(bytes, valueIBegin, valueILength); - return valueObjects[index]; + if (isNull(oi.getNullSequence(), bytes, valueIBegin, valueILength)) { + valueObjects[index].setNull(); + } else { + valueObjects[index].init(bytes, valueIBegin, valueILength); + } + return valueObjects[index].getObject(); } /** @@ -292,20 +290,16 @@ private LazyObject uncheckedGetValue(int index) { if (keyInited[index]) { return keyObjects[index]; } - keyInited[index] = true; - - Text nullSequence = oi.getNullSequence(); int keyIBegin = keyStart[index]; int keyILength = keyEnd[index] - keyStart[index]; - if (keyILength < 0 - || ((keyILength == nullSequence.getLength()) && 0 == LazyUtils.compare( - bytes.getData(), keyIBegin, keyILength, nullSequence.getBytes(), 0, - nullSequence.getLength()))) { - return keyObjects[index] = null; + if (isNull(oi.getNullSequence(), bytes, keyIBegin, keyILength)) { + return null; + } + keyInited[index] = true; + if (keyObjects[index] == null) { + keyObjects[index] = LazyFactory.createLazyPrimitiveClass( + (PrimitiveObjectInspector) oi.getMapKeyObjectInspector()); } - // Keys are always primitive - keyObjects[index] = LazyFactory - .createLazyPrimitiveClass((PrimitiveObjectInspector) oi.getMapKeyObjectInspector()); keyObjects[index].init(bytes, keyIBegin, keyILength); return keyObjects[index]; } @@ -341,9 +335,7 @@ private LazyObject uncheckedGetValue(int index) { Object key = lazyKey.getObject(); // do not overwrite if there are duplicate keys if (key != null && !cachedMap.containsKey(key)) { - LazyObject lazyValue = uncheckedGetValue(i); - Object value = (lazyValue == null ? null : lazyValue.getObject()); - cachedMap.put(key, value); + cachedMap.put(key, uncheckedGetValue(i)); } } return cachedMap; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java index e4cffc9..6a4dcaa 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.serde2.lazy; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.Text; /** * LazyPrimitive stores a primitive Object in a LazyObject. @@ -49,9 +50,7 @@ protected LazyNonPrimitive(OI oi) { @Override public void init(ByteArrayRef bytes, int start, int length) { - if (bytes == null) { - throw new RuntimeException("bytes cannot be null!"); - } + super.init(bytes, start, length); this.bytes = bytes; this.start = start; this.length = length; @@ -59,9 +58,18 @@ public void init(ByteArrayRef bytes, int start, int length) { assert start + length <= bytes.getData().length; } - @Override - public Object getObject() { - return this; + protected final boolean isNull( + Text nullSequence, ByteArrayRef ref, int fieldByteBegin, int fieldLength) { + return ref == null || isNull(nullSequence, ref.getData(), fieldByteBegin, fieldLength); + } + + protected final boolean isNull( + Text nullSequence, byte[] bytes, int fieldByteBegin, int fieldLength) { + // Test the length first so in most cases we avoid doing a byte[] + // comparison. + return fieldLength < 0 || (fieldLength == nullSequence.getLength() && + LazyUtils.compare(bytes, fieldByteBegin, fieldLength, + nullSequence.getBytes(), 0, nullSequence.getLength()) == 0); } @Override diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java index 9b5ccbe..3c7e97f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java @@ -50,4 +50,28 @@ protected OI getInspector() { protected void setInspector(OI oi) { this.oi = oi; } + + protected boolean isNull; + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + if (bytes == null) { + throw new RuntimeException("bytes cannot be null!"); + } + this.isNull = false; + } + + @Override + public void setNull() { + this.isNull = true; + } + + /** + * Returns the primitive object represented by this LazyObject. This is useful + * because it can make sure we have "null" for null objects. + */ + @Override + public Object getObject() { + return isNull ? null : this; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java index 7e42b3f..a9b9128 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java @@ -36,6 +36,11 @@ void init(ByteArrayRef bytes, int start, int length); /** + * called for null binary, hbase columns, for example + */ + void setNull(); + + /** * If the LazyObjectBase is a primitive Object, then deserialize it and return the * actual primitive Object. Otherwise (array, map, struct), return this. */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java index 7d23c46..32224a8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java @@ -42,16 +42,6 @@ protected LazyPrimitive(LazyPrimitive copy) { } protected T data; - protected boolean isNull = false; - - /** - * Returns the primitive object represented by this LazyObject. This is useful - * because it can make sure we have "null" for null objects. - */ - @Override - public Object getObject() { - return isNull ? null : this; - } public T getWritableObject() { return isNull ? null : data; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java index 75b9556..539d767 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java @@ -45,6 +45,7 @@ public void init(ByteArrayRef bytes, int start, int length) { // if the data is not escaped, simply copy the data. data.set(bytes.getData(), start, length); } + isNull = false; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java index 588cc8c..9a246af 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java @@ -29,8 +29,6 @@ import org.apache.hadoop.hive.serde2.StructObject; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.Text; /** * LazyObject for storing a struct. The field of a struct can be primitive or @@ -215,25 +213,22 @@ public Object getField(int fieldID) { * @return The value of the field */ private Object uncheckedGetField(int fieldID) { - Text nullSequence = oi.getNullSequence(); - // Test the length first so in most cases we avoid doing a byte[] - // comparison. + if (fieldInited[fieldID]) { + return fields[fieldID].getObject(); + } + fieldInited[fieldID] = true; + int fieldByteBegin = startPosition[fieldID]; int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; - if ((fieldLength < 0) - || (fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes - .getData(), fieldByteBegin, fieldLength, nullSequence.getBytes(), - 0, nullSequence.getLength()) == 0)) { - return null; - } - if (!fieldInited[fieldID]) { - fieldInited[fieldID] = true; + if (isNull(oi.getNullSequence(), bytes, fieldByteBegin, fieldLength)) { + fields[fieldID].setNull(); + } else { fields[fieldID].init(bytes, fieldByteBegin, fieldLength); } return fields[fieldID].getObject(); } - List cachedList; + private transient List cachedList; /** * Get the values of the fields as an ArrayList. @@ -255,11 +250,6 @@ private Object uncheckedGetField(int fieldID) { return cachedList; } - @Override - public Object getObject() { - return this; - } - protected boolean getParsed() { return parsed; } @@ -294,7 +284,7 @@ public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) { return; } if (fields == null) { - List fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs(); + List fieldRefs = oi.getAllStructFieldRefs(); fields = new LazyObject[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector()); @@ -351,4 +341,4 @@ public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) { public byte[] getBytes() { return bytes.getData(); } -} \ No newline at end of file +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java index 22fc638..45a9eb0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazy; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; -import org.apache.hadoop.io.Text; /** * LazyObject for storing a union. The field of a union can be primitive or @@ -52,11 +51,6 @@ private boolean fieldInited = false; /** - * Whether the tag has been set or not - * */ - private boolean tagSet = false; - - /** * Whether the field has been set or not * */ private boolean fieldSet = false; @@ -68,6 +62,14 @@ public LazyUnion(LazyUnionObjectInspector oi) { super(oi); } + // exceptional use case for avro + public LazyUnion(LazyUnionObjectInspector oi, byte tag, Object field) { + super(oi); + this.field = field; + this.tag = tag; + fieldSet = true; + } + /** * Set the row data for this LazyUnion. * @@ -133,19 +135,20 @@ private void parse() { */ @SuppressWarnings("rawtypes") private Object uncheckedGetField() { - Text nullSequence = oi.getNullSequence(); - int fieldLength = start + length - startPosition; - if (fieldLength != 0 && fieldLength == nullSequence.getLength() && - LazyUtils.compare(bytes.getData(), startPosition, fieldLength, - nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) { - return null; + LazyObject field = (LazyObject) this.field; + if (fieldInited) { + return field.getObject(); } + fieldInited = true; - if (!fieldInited) { - fieldInited = true; - ((LazyObject) field).init(bytes, startPosition, fieldLength); + int fieldStart = startPosition; + int fieldLength = start + length - startPosition; + if (isNull(oi.getNullSequence(), bytes, fieldStart, fieldLength)) { + field.setNull(); + } else { + field.init(bytes, fieldStart, fieldLength); } - return ((LazyObject) field).getObject(); + return field.getObject(); } /** @@ -170,7 +173,7 @@ public Object getField() { * @return The tag byte */ public byte getTag() { - if (tagSet) { + if (fieldSet) { return tag; } @@ -179,24 +182,4 @@ public byte getTag() { } return tag; } - - /** - * Set the field of the union - * - * @param field the field to be set - * */ - public void setField(Object field) { - this.field = field; - fieldSet = true; - } - - /** - * Set the tag for the union - * - * @param tag the tag to be set - * */ - public void setTag(byte tag) { - this.tag = tag; - tagSet = true; - } } \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java index b3625b3..3eb75a2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java @@ -42,6 +42,10 @@ protected LazyBinaryObject(OI oi) { this.oi = oi; } + public void setNull() { + throw new IllegalStateException("should not be called"); + } + @Override public abstract int hashCode(); }