diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java index 08dfa61..17a17ae 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java @@ -62,11 +62,13 @@ public class HBaseSerDe implements SerDe { public static final String HBASE_COLUMNS_MAPPING = "hbase.columns.mapping"; public static final String HBASE_TABLE_NAME = "hbase.table.name"; + public static final String HBASE_TABLE_DEFAULT_STORAGE_TYPE = "hbase.table.default.storage.type"; public static final String HBASE_KEY_COL = ":key"; public static final Log LOG = LogFactory.getLog(HBaseSerDe.class); private ObjectInspector cachedObjectInspector; private String hbaseColumnsMapping; + private List columnsMapping; private List hbaseColumnFamilies; private List hbaseColumnFamiliesBytes; private List hbaseColumnQualifiers; @@ -108,8 +110,7 @@ public class HBaseSerDe implements SerDe { public void initialize(Configuration conf, Properties tbl) throws SerDeException { - initHBaseSerDeParameters(conf, tbl, - getClass().getName()); + initHBaseSerDeParameters(conf, tbl, getClass().getName()); cachedObjectInspector = LazyFactory.createLazyStructInspector( serdeParams.getColumnNames(), @@ -134,6 +135,89 @@ public class HBaseSerDe implements SerDe { } /** + * Parses the HBase columns mapping specifier to identify the column families, qualifiers + * and also caches the byte arrays corresponding to them. One of the Hive table + * columns maps to the HBase row key, by default the first column. + * + * @param columnsMappingSpec string hbase.columns.mapping specified when creating table + * @return List which contains the column mapping information by position + * @throws SerDeException + */ + public static List parseColumnsMapping(String columnsMappingSpec) + throws SerDeException { + + if (columnsMappingSpec == null) { + throw new SerDeException("Error: hbase.columns.mapping missing for this HBase table."); + } + + if (columnsMappingSpec.equals("") || columnsMappingSpec.equals(HBASE_KEY_COL)) { + throw new SerDeException("Error: hbase.columns.mapping specifies only the HBase table" + + " row key. A valid Hive-HBase table must specify at least one additional column."); + } + + int rowKeyIndex = -1; + List columnsMapping = new ArrayList(); + String [] columnSpecs = columnsMappingSpec.split(","); + ColumnMapping columnMapping = null; + + for (int i = 0; i < columnSpecs.length; i++) { + String mappingSpec = columnSpecs[i]; + String [] mapInfo = mappingSpec.split("#"); + String colInfo = mapInfo[0]; + + int idxFirst = colInfo.indexOf(":"); + int idxLast = colInfo.lastIndexOf(":"); + + if (idxFirst < 0 || !(idxFirst == idxLast)) { + throw new SerDeException("Error: the HBase columns mapping contains a badly formed " + + "column family, column qualifier specification."); + } + + columnMapping = new ColumnMapping(); + + if (colInfo.equals(HBASE_KEY_COL)) { + rowKeyIndex = i; + columnMapping.familyName = colInfo; + columnMapping.familyNameBytes = Bytes.toBytes(colInfo); + columnMapping.qualifierName = null; + columnMapping.qualifierNameBytes = null; + columnMapping.hbaseRowKey = true; + } else { + String [] parts = colInfo.split(":"); + assert(parts.length > 0 && parts.length <= 2); + columnMapping.familyName = parts[0]; + columnMapping.familyNameBytes = Bytes.toBytes(parts[0]); + columnMapping.hbaseRowKey = false; + + if (parts.length == 2) { + columnMapping.qualifierName = parts[1]; + columnMapping.qualifierNameBytes = Bytes.toBytes(parts[1]); + } else { + columnMapping.qualifierName = null; + columnMapping.qualifierNameBytes = null; + } + } + + columnMapping.mappingSpec = mappingSpec; + + columnsMapping.add(columnMapping); + } + + if (rowKeyIndex == -1) { + columnMapping = new ColumnMapping(); + columnMapping.familyName = HBASE_KEY_COL; + columnMapping.familyNameBytes = Bytes.toBytes(HBASE_KEY_COL); + columnMapping.qualifierName = null; + columnMapping.qualifierNameBytes = null; + columnMapping.hbaseRowKey = true; + columnMapping.mappingSpec = HBASE_KEY_COL; + columnsMapping.add(0, columnMapping); + } + + return columnsMapping; + } + + /** * Parses the HBase columns mapping to identify the column families, qualifiers * and also caches the byte arrays corresponding to them. One of the Hive table * columns maps to the HBase row key, by default the first column. @@ -242,10 +326,202 @@ public class HBaseSerDe implements SerDe { return rowKeyIndex; } - public static boolean isSpecialColumn(String hbaseColumnName) { + + /* + * Utility method for parsing a string of the form '-,b,s,-,s:b,...' as a means of specifying + * whether to use a binary or an UTF string format to serialize and de-serialize primitive + * data types like boolean, byte, short, int, long, float, and double. This applies to + * regular columns and also to map column types which are associated with an HBase column + * family. For the map types, we apply the specification to the key or the value provided it + * is one of the above primitive types. The specifier is a colon separated value of the form + * -:s, or b:b where we have 's', 'b', or '-' on either side of the colon. 's' is for string + * format storage, 'b' is for native fixed width byte oriented storage, and '-' uses the + * table level default. + * + * @param hbaseTableDefaultStorageType - the specification associated with the table property + * hbase.table.default.storage.type + * @throws SerDeException on parse error. + */ + + private void parseColumnStorageTypes(String hbaseTableDefaultStorageType) + throws SerDeException { + + boolean tableBinaryStorage = false; + + if (hbaseTableDefaultStorageType != null && !"".equals(hbaseTableDefaultStorageType)) { + if (hbaseTableDefaultStorageType.equals("binary")) { + tableBinaryStorage = true; + } else if (!hbaseTableDefaultStorageType.equals("string")) { + throw new SerDeException("Error: " + HBASE_TABLE_DEFAULT_STORAGE_TYPE + + " parameter must be specified as" + + " 'string' or 'binary'; '" + hbaseTableDefaultStorageType + + "' is not a valid specification for this table/serde property."); + } + } + + // parse the string to determine column level storage type for primitive types + // 's' is for variable length string format storage + // 'b' is for fixed width binary storage of bytes + // '-' is for table storage type, which defaults to UTF8 string + // string data is always stored in the default escaped storage format; the data types + // byte, short, int, long, float, and double have a binary byte oriented storage option + List columnTypes = serdeParams.getColumnTypes(); + + for (int i = 0; i < columnsMapping.size(); i++) { + + ColumnMapping colMap = columnsMapping.get(i); + TypeInfo colType = columnTypes.get(i); + String mappingSpec = colMap.mappingSpec; + String [] mapInfo = mappingSpec.split("#"); + String [] storageInfo = null; + + if (mapInfo.length == 2) { + storageInfo = mapInfo[1].split(":"); + } + + if (storageInfo == null) { + + // use the table default storage specification + if (colType.getCategory() == Category.PRIMITIVE) { + if (!colType.getTypeName().equals(Constants.STRING_TYPE_NAME)) { + colMap.binaryStorage.add(tableBinaryStorage); + } else { + colMap.binaryStorage.add(false); + } + } else if (colType.getCategory() == Category.MAP) { + TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo(); + TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo(); + + if (keyTypeInfo.getCategory() == Category.PRIMITIVE && + !keyTypeInfo.getTypeName().equals(Constants.STRING_TYPE_NAME)) { + colMap.binaryStorage.add(tableBinaryStorage); + } else { + colMap.binaryStorage.add(false); + } + + if (valueTypeInfo.getCategory() == Category.PRIMITIVE && + !valueTypeInfo.getTypeName().equals(Constants.STRING_TYPE_NAME)) { + colMap.binaryStorage.add(tableBinaryStorage); + } else { + colMap.binaryStorage.add(false); + } + } else { + colMap.binaryStorage.add(false); + } + + } else if (storageInfo.length == 1) { + // we have a storage specification for a primitive column type + String storageOption = storageInfo[0]; + + if ((colType.getCategory() == Category.MAP) || + !(storageOption.equals("-") || "string".startsWith(storageOption) || + "binary".startsWith(storageOption))) { + throw new SerDeException("Error: A column storage specification is one of the following:" + + " '-', a prefix of 'string', or a prefix of 'binary'. " + + storageOption + " is not a valid storage option specification for " + + serdeParams.getColumnNames().get(i)); + } + + if (colType.getCategory() == Category.PRIMITIVE && + !colType.getTypeName().equals(Constants.STRING_TYPE_NAME)) { + + if ("-".equals(storageOption)) { + colMap.binaryStorage.add(tableBinaryStorage); + } else if ("binary".startsWith(storageOption)) { + colMap.binaryStorage.add(true); + } else { + colMap.binaryStorage.add(false); + } + } else { + colMap.binaryStorage.add(false); + } + + } else if (storageInfo.length == 2) { + // we have a storage specification for a map column type + + String keyStorage = storageInfo[0]; + String valStorage = storageInfo[1]; + + if ((colType.getCategory() != Category.MAP) || + !(keyStorage.equals("-") || "string".startsWith(keyStorage) || + "binary".startsWith(keyStorage)) || + !(valStorage.equals("-") || "string".startsWith(valStorage) || + "binary".startsWith(valStorage))) { + throw new SerDeException("Error: To specify a valid column storage type for a Map" + + " column, use any two specifiers from '-', a prefix of 'string', " + + " and a prefix of 'binary' separated by a ':'." + + " Valid examples are '-:-', 's:b', etc. They specify the storage type for the" + + " key and value parts of the Map respectively." + + " Invalid storage specification for column " + + serdeParams.getColumnNames().get(i) + + "; " + storageInfo[0] + ":" + storageInfo[1]); + } + + TypeInfo keyTypeInfo = ((MapTypeInfo) colType).getMapKeyTypeInfo(); + TypeInfo valueTypeInfo = ((MapTypeInfo) colType).getMapValueTypeInfo(); + + if (keyTypeInfo.getCategory() == Category.PRIMITIVE && + !keyTypeInfo.getTypeName().equals(Constants.STRING_TYPE_NAME)) { + + if (keyStorage.equals("-")) { + colMap.binaryStorage.add(tableBinaryStorage); + } else if ("binary".startsWith(keyStorage)) { + colMap.binaryStorage.add(true); + } else { + colMap.binaryStorage.add(false); + } + } else { + colMap.binaryStorage.add(false); + } + + if (valueTypeInfo.getCategory() == Category.PRIMITIVE && + !valueTypeInfo.getTypeName().equals(Constants.STRING_TYPE_NAME)) { + if (valStorage.equals("-")) { + colMap.binaryStorage.add(tableBinaryStorage); + } else if ("binary".startsWith(valStorage)) { + colMap.binaryStorage.add(true); + } else { + colMap.binaryStorage.add(false); + } + } else { + colMap.binaryStorage.add(false); + } + + if (colMap.binaryStorage.size() != 2) { + throw new SerDeException("Error: In parsing the storage specification for column " + + serdeParams.getColumnNames().get(i)); + } + + } else { + // error in storage specification + throw new SerDeException("Error: " + HBASE_COLUMNS_MAPPING + " storage specification " + + mappingSpec + " is not valid for column: " + + serdeParams.getColumnNames().get(i)); + } + } + } + + + public static boolean isRowKeyColumn(String hbaseColumnName) { return hbaseColumnName.equals(HBASE_KEY_COL); } + + static class ColumnMapping { + + ColumnMapping() { + binaryStorage = new ArrayList(2); + } + + String familyName; + String qualifierName; + byte [] familyNameBytes; + byte [] qualifierNameBytes; + List binaryStorage; + boolean hbaseRowKey; + String mappingSpec; + } + private void initHBaseSerDeParameters( Configuration job, Properties tbl, String serdeName) throws SerDeException { @@ -254,33 +530,27 @@ public class HBaseSerDe implements SerDe { hbaseColumnsMapping = tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); - // Parse the HBase columns mapping and initialize the col family & qualifiers - hbaseColumnFamilies = new ArrayList(); - hbaseColumnFamiliesBytes = new ArrayList(); - hbaseColumnQualifiers = new ArrayList(); - hbaseColumnQualifiersBytes = new ArrayList(); - iKey = parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, - hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); + // Parse and initialize the HBase columns mapping + columnsMapping = parseColumnsMapping(hbaseColumnsMapping); // Build the type property string if not supplied if (columnTypeProperty == null) { StringBuilder sb = new StringBuilder(); - for (int i = 0; i < hbaseColumnFamilies.size(); i++) { + for (int i = 0; i < columnsMapping.size(); i++) { if (sb.length() > 0) { sb.append(":"); } - String colFamily = hbaseColumnFamilies.get(i); - String colQualifier = hbaseColumnQualifiers.get(i); - if (isSpecialColumn(colFamily)) { - // the row key column becomes a STRING - sb.append(Constants.STRING_TYPE_NAME); - } else if (colQualifier == null) { + + ColumnMapping colMap = columnsMapping.get(i); + + if (colMap.hbaseRowKey) { + // the row key column becomes a STRING + sb.append(Constants.STRING_TYPE_NAME); + } else if (colMap.qualifierName == null) { // a column family become a MAP - sb.append( - Constants.MAP_TYPE_NAME + "<" - + Constants.STRING_TYPE_NAME - + "," + Constants.STRING_TYPE_NAME + ">"); + sb.append(Constants.MAP_TYPE_NAME + "<" + Constants.STRING_TYPE_NAME + "," + + Constants.STRING_TYPE_NAME + ">"); } else { // an individual column becomes a STRING sb.append(Constants.STRING_TYPE_NAME); @@ -291,11 +561,11 @@ public class HBaseSerDe implements SerDe { serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName); - if (hbaseColumnFamilies.size() != serdeParams.getColumnNames().size()) { + if (columnsMapping.size() != serdeParams.getColumnNames().size()) { throw new SerDeException(serdeName + ": columns has " + serdeParams.getColumnNames().size() + " elements while hbase.columns.mapping has " + - hbaseColumnFamilies.size() + " elements" + + columnsMapping.size() + " elements" + " (counting the key if implicit)"); } @@ -305,24 +575,29 @@ public class HBaseSerDe implements SerDe { needsEscape = serdeParams.getNeedsEscape(); // check that the mapping schema is right; - // check that the "column-family:" is mapped to MAP - for (int i = 0; i < hbaseColumnFamilies.size(); i++) { - String colFamily = hbaseColumnFamilies.get(i); - String colQualifier = hbaseColumnQualifiers.get(i); - if (colQualifier == null && !isSpecialColumn(colFamily)) { + // check that the "column-family:" is mapped to Map + // where key extends LazyPrimitive and thus has type Category.PRIMITIVE + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + if (colMap.qualifierName == null && !colMap.hbaseRowKey) { TypeInfo typeInfo = serdeParams.getColumnTypes().get(i); if ((typeInfo.getCategory() != Category.MAP) || - (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getTypeName() - != Constants.STRING_TYPE_NAME)) { + (((MapTypeInfo) typeInfo).getMapKeyTypeInfo().getCategory() + != Category.PRIMITIVE)) { throw new SerDeException( - serdeName + ": hbase column family '" - + colFamily - + "' should be mapped to Map but is mapped to " + serdeName + ": hbase column family '" + colMap.familyName + + "' should be mapped to Map,?>, that is " + + "the Key for the map should be of primitive type, but is mapped to " + typeInfo.getTypeName()); } } } + + // Precondition: make sure this is done after the rest of the SerDe initialization is done. + String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE); + parseColumnStorageTypes(hbaseTableStorageType); + setKeyColumnOffset(); } /** @@ -338,8 +613,7 @@ public class HBaseSerDe implements SerDe { throw new SerDeException(getClass().getName() + ": expects Result!"); } - cachedHBaseRow.init((Result) result, hbaseColumnFamilies, hbaseColumnFamiliesBytes, - hbaseColumnQualifiers, hbaseColumnQualifiersBytes); + cachedHBaseRow.init((Result) result, columnsMapping); return cachedHBaseRow; } @@ -407,9 +681,8 @@ public class HBaseSerDe implements SerDe { List list, List declaredFields) throws IOException { - // column name - String hbaseColumnFamily = hbaseColumnFamilies.get(i); - String hbaseColumnQualifier = hbaseColumnQualifiers.get(i); + // column mapping info + ColumnMapping colMap = columnsMapping.get(i); // Get the field objectInspector and the field object. ObjectInspector foi = fields.get(i).getFieldObjectInspector(); @@ -421,8 +694,8 @@ public class HBaseSerDe implements SerDe { } // If the field corresponds to a column family in HBase - if (hbaseColumnQualifier == null && !isSpecialColumn(hbaseColumnFamily)) { - MapObjectInspector moi = (MapObjectInspector)foi; + if (colMap.qualifierName == null && !colMap.hbaseRowKey) { + MapObjectInspector moi = (MapObjectInspector) foi; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); @@ -433,7 +706,12 @@ public class HBaseSerDe implements SerDe { for (Map.Entry entry: map.entrySet()) { // Get the Key serializeStream.reset(); - serialize(entry.getKey(), koi, 3); + + // Map keys are required to be primitive and may be serialized in binary format + boolean isNotNull = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0)); + if (!isNotNull) { + continue; + } // Get the column-qualifier byte [] columnQualifierBytes = new byte[serializeStream.getCount()]; @@ -442,13 +720,16 @@ public class HBaseSerDe implements SerDe { // Get the Value serializeStream.reset(); - boolean isNotNull = serialize(entry.getValue(), voi, 3); + + // Map values may be serialized in binary format when they are primitive and binary + // serialization is the option selected + isNotNull = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1)); if (!isNotNull) { continue; } byte [] value = new byte[serializeStream.getCount()]; System.arraycopy(serializeStream.getData(), 0, value, 0, serializeStream.getCount()); - put.add(hbaseColumnFamiliesBytes.get(i), columnQualifierBytes, value); + put.add(colMap.familyNameBytes, columnQualifierBytes, value); } } } else { @@ -464,12 +745,15 @@ public class HBaseSerDe implements SerDe { declaredFields.get(i).getFieldObjectInspector().getCategory() .equals(Category.PRIMITIVE) || useJSONSerialize)) { + // we always serialize the String type using the escaped algorithm for LazyString isNotNull = serialize( SerDeUtils.getJSONString(f, foi), PrimitiveObjectInspectorFactory.javaStringObjectInspector, - 1); + 1, false); } else { - isNotNull = serialize(f, foi, 1); + // use the serialization option switch to write primitive values as either a variable + // length UTF8 string or a fixed width bytes if serializing in binary format + isNotNull = serialize(f, foi, 1, colMap.binaryStorage.get(0)); } if (!isNotNull) { return null; @@ -479,32 +763,49 @@ public class HBaseSerDe implements SerDe { if (i == iKey) { return key; } - put.add(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i), key); + put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, key); } return null; } - /** + /* * Serialize the row into a ByteStream. * * @param obj The object for the current field. * @param objInspector The ObjectInspector for the current Object. * @param level The current level of separator. - * @throws IOException - * @return true, if serialize is a not-null object; otherwise false. + * @param writeBinary Whether to write a primitive object as an UTF8 variable length string or + * as a fixed width byte array onto the byte stream. + * @throws IOException On error in writing to the serialization stream. + * @return true On serializing a non-null object, otherwise false. */ - private boolean serialize(Object obj, ObjectInspector objInspector, int level) - throws IOException { + private boolean serialize( + Object obj, + ObjectInspector objInspector, + int level, + boolean writeBinary) throws IOException { + + if (objInspector.getCategory() == Category.PRIMITIVE && writeBinary) { + LazyUtils.writePrimitive(serializeStream, obj, (PrimitiveObjectInspector) objInspector); + return true; + } else { + return serialize(obj, objInspector, level); + } + } + + private boolean serialize( + Object obj, + ObjectInspector objInspector, + int level) throws IOException { switch (objInspector.getCategory()) { case PRIMITIVE: { - LazyUtils.writePrimitiveUTF8( - serializeStream, obj, - (PrimitiveObjectInspector) objInspector, - escaped, escapeChar, needsEscape); + LazyUtils.writePrimitiveUTF8(serializeStream, obj, + (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape); return true; } + case LIST: { char separator = (char) separators[level]; ListObjectInspector loi = (ListObjectInspector)objInspector; @@ -522,6 +823,7 @@ public class HBaseSerDe implements SerDe { } return true; } + case MAP: { char separator = (char) separators[level]; char keyValueSeparator = (char) separators[level+1]; @@ -547,6 +849,7 @@ public class HBaseSerDe implements SerDe { } return true; } + case STRUCT: { char separator = (char)separators[level]; StructObjectInspector soi = (StructObjectInspector)objInspector; @@ -559,7 +862,9 @@ public class HBaseSerDe implements SerDe { if (i > 0) { serializeStream.write(separator); } - serialize(list.get(i), fields.get(i).getFieldObjectInspector(), level + 1); + + serialize(list.get(i), fields.get(i).getFieldObjectInspector(), + level + 1); } } return true; @@ -595,4 +900,23 @@ public class HBaseSerDe implements SerDe { // no support for statistics return null; } + + void setKeyColumnOffset() throws SerDeException { + iKey = getRowKeyColumnOffset(columnsMapping); + } + + public static int getRowKeyColumnOffset(List columnsMapping) + throws SerDeException { + + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + + if (colMap.hbaseRowKey && colMap.familyName.equals(HBASE_KEY_COL)) { + return i; + } + } + + throw new SerDeException("HBaseSerDe Error: columns mapping list does not contain" + + " row key column."); + } } diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java index de1cb77..abde695 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsAggregator.java @@ -49,8 +49,8 @@ public class HBaseStatsAggregator implements StatsAggregator { public boolean connect(Configuration hiveconf) { try { - HBaseConfiguration hbaseConf = new HBaseConfiguration(hiveconf); - htable = new HTable(hbaseConf, HBaseStatsSetupConstants.PART_STAT_TABLE_NAME); + htable = new HTable(HBaseConfiguration.create(hiveconf), + HBaseStatsSetupConstants.PART_STAT_TABLE_NAME); return true; } catch (IOException e) { diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java index b8fa31c..79ef8de 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStatsPublisher.java @@ -49,8 +49,8 @@ public class HBaseStatsPublisher implements StatsPublisher { public boolean connect(Configuration hiveconf) { try { - HBaseConfiguration hbaseConf = new HBaseConfiguration(hiveconf); - htable = new HTable(hbaseConf, HBaseStatsSetupConstants.PART_STAT_TABLE_NAME); + htable = new HTable(HBaseConfiguration.create(hiveconf), + HBaseStatsSetupConstants.PART_STAT_TABLE_NAME); // for performance reason, defer update until the closeConnection htable.setAutoFlush(false); } catch (IOException e) { @@ -130,8 +130,7 @@ public class HBaseStatsPublisher implements StatsPublisher { */ public boolean init(Configuration hiveconf) { try { - HBaseConfiguration hbaseConf = new HBaseConfiguration(hiveconf); - HBaseAdmin hbase = new HBaseAdmin(hbaseConf); + HBaseAdmin hbase = new HBaseAdmin(HBaseConfiguration.create(hiveconf)); // Creating table if not exists if (!hbase.tableExists(HBaseStatsSetupConstants.PART_STAT_TABLE_NAME)) { diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java index 8757cf0..0a2e096 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.hbase.HBaseSerDe.ColumnMapping; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Constants; @@ -64,7 +65,7 @@ public class HBaseStorageHandler extends DefaultStorageHandler final static public String DEFAULT_PREFIX = "default."; - private HBaseConfiguration hbaseConf; + private Configuration hbaseConf; private HBaseAdmin admin; private HBaseAdmin getHBaseAdmin() throws MetaException { @@ -140,17 +141,9 @@ public class HBaseStorageHandler extends DefaultStorageHandler String tableName = getHBaseTableName(tbl); Map serdeParam = tbl.getSd().getSerdeInfo().getParameters(); String hbaseColumnsMapping = serdeParam.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); + List columnsMapping = null; - if (hbaseColumnsMapping == null) { - throw new MetaException("No hbase.columns.mapping defined in Serde."); - } - - List hbaseColumnFamilies = new ArrayList(); - List hbaseColumnQualifiers = new ArrayList(); - List hbaseColumnFamiliesBytes = new ArrayList(); - List hbaseColumnQualifiersBytes = new ArrayList(); - int iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, - hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); + columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping); HTableDescriptor tableDesc; @@ -159,8 +152,13 @@ public class HBaseStorageHandler extends DefaultStorageHandler if (!isExternal) { // Create the column descriptors tableDesc = new HTableDescriptor(tableName); - Set uniqueColumnFamilies = new HashSet(hbaseColumnFamilies); - uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey)); + Set uniqueColumnFamilies = new HashSet(); + + for (ColumnMapping colMap : columnsMapping) { + if (!colMap.hbaseRowKey) { + uniqueColumnFamilies.add(colMap.familyName); + } + } for (String columnFamily : uniqueColumnFamilies) { tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes(columnFamily))); @@ -182,13 +180,15 @@ public class HBaseStorageHandler extends DefaultStorageHandler // make sure the schema mapping is right tableDesc = getHBaseAdmin().getTableDescriptor(Bytes.toBytes(tableName)); - for (int i = 0; i < hbaseColumnFamilies.size(); i++) { - if (i == iKey) { + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + + if (colMap.hbaseRowKey) { continue; } - if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) { - throw new MetaException("Column Family " + hbaseColumnFamilies.get(i) + if (!tableDesc.hasFamily(colMap.familyNameBytes)) { + throw new MetaException("Column Family " + colMap.familyName + " is not defined in hbase table " + tableName); } } @@ -234,7 +234,7 @@ public class HBaseStorageHandler extends DefaultStorageHandler @Override public void setConf(Configuration conf) { - hbaseConf = new HBaseConfiguration(conf); + hbaseConf = HBaseConfiguration.create(conf); } @Override diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java index 1f585f4..602902e 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.mapreduce.TableInputFormatBase; import org.apache.hadoop.hbase.mapreduce.TableSplit; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.hive.hbase.HBaseSerDe.ColumnMapping; import org.apache.hadoop.hive.ql.exec.ExprNodeConstantEvaluator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer; @@ -79,23 +80,18 @@ public class HiveHBaseTableInputFormat extends TableInputFormatBase HBaseSplit hbaseSplit = (HBaseSplit) split; TableSplit tableSplit = hbaseSplit.getSplit(); String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); - setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); + setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); - List hbaseColumnFamilies = new ArrayList(); - List hbaseColumnQualifiers = new ArrayList(); - List hbaseColumnFamiliesBytes = new ArrayList(); - List hbaseColumnQualifiersBytes = new ArrayList(); + List readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); + List columnsMapping = null; - int iKey; try { - iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, - hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); - } catch (SerDeException se) { - throw new IOException(se); + columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping); + } catch (SerDeException e) { + throw new IOException(e); } - List readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); - if (hbaseColumnFamilies.size() < readColIDs.size()) { + if (columnsMapping.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } @@ -105,14 +101,15 @@ public class HiveHBaseTableInputFormat extends TableInputFormatBase if (!addAll) { for (int i : readColIDs) { - if (i == iKey) { + ColumnMapping colMap = columnsMapping.get(i); + if (colMap.hbaseRowKey) { continue; } - if (hbaseColumnQualifiers.get(i) == null) { - scan.addFamily(hbaseColumnFamiliesBytes.get(i)); + if (colMap.qualifierName == null) { + scan.addFamily(colMap.familyNameBytes); } else { - scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); + scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes); } empty = false; @@ -125,15 +122,16 @@ public class HiveHBaseTableInputFormat extends TableInputFormatBase // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive // tables column projection. if (empty) { - for (int i = 0; i < hbaseColumnFamilies.size(); i++) { - if (i == iKey) { + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + if (colMap.hbaseRowKey) { continue; } - if (hbaseColumnQualifiers.get(i) == null) { - scan.addFamily(hbaseColumnFamiliesBytes.get(i)); + if (colMap.qualifierName == null) { + scan.addFamily(colMap.familyNameBytes); } else { - scan.addColumn(hbaseColumnFamiliesBytes.get(i), hbaseColumnQualifiersBytes.get(i)); + scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes); } if (!addAll) { @@ -144,10 +142,16 @@ public class HiveHBaseTableInputFormat extends TableInputFormatBase // If Hive's optimizer gave us a filter to process, convert it to the // HBase scan form now. - tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); + int iKey = -1; - setScan(scan); + try { + iKey = HBaseSerDe.getRowKeyColumnOffset(columnsMapping); + } catch (SerDeException e) { + throw new IOException(e); + } + tableSplit = convertFilter(jobConf, scan, tableSplit, iKey); + setScan(scan); Job job = new Job(jobConf); TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext( job.getConfiguration(), reporter); @@ -363,50 +367,53 @@ public class HiveHBaseTableInputFormat extends TableInputFormatBase public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME); - setHTable(new HTable(new HBaseConfiguration(jobConf), Bytes.toBytes(hbaseTableName))); + setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName))); String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); if (hbaseColumnsMapping == null) { throw new IOException("hbase.columns.mapping required for HBase Table."); } - List hbaseColumnFamilies = new ArrayList(); - List hbaseColumnQualifiers = new ArrayList(); - List hbaseColumnFamiliesBytes = new ArrayList(); - List hbaseColumnQualifiersBytes = new ArrayList(); + List columnsMapping = null; + try { + columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping); + } catch (SerDeException e) { + throw new IOException(e); + } int iKey; + try { - iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, - hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); - } catch (SerDeException se) { - throw new IOException(se); + iKey = HBaseSerDe.getRowKeyColumnOffset(columnsMapping); + } catch (SerDeException e) { + throw new IOException(e); } Scan scan = new Scan(); - // Take filter pushdown into account while calculating splits; this - // allows us to prune off regions immediately. Note that although - // the Javadoc for the superclass getSplits says that it returns one - // split per region, the implementation actually takes the scan - // definition into account and excludes regions which don't satisfy - // the start/stop row conditions (HBASE-1829). - convertFilter(jobConf, scan, null, iKey); - // REVIEW: are we supposed to be applying the getReadColumnIDs // same as in getRecordReader? - for (int i = 0; i < hbaseColumnFamilies.size(); i++) { - if (i == iKey) { + for (int i = 0; i binaryStorage; /** * Construct a LazyCellMap object with the ObjectInspector. @@ -50,9 +52,14 @@ public class LazyHBaseCellMap extends LazyMap { super(oi); } - public void init(Result r, byte [] columnFamilyBytes) { + public void init( + Result r, + byte [] columnFamilyBytes, + List binaryStorage) { + result = r; this.columnFamilyBytes = columnFamilyBytes; + this.binaryStorage = binaryStorage; setParsed(false); } @@ -73,10 +80,13 @@ public class LazyHBaseCellMap extends LazyMap { continue; } + LazyMapObjectInspector lazyMoi = getInspector(); + // Keys are always primitive LazyPrimitive key = LazyFactory.createLazyPrimitiveClass( - (PrimitiveObjectInspector) getInspector().getMapKeyObjectInspector()); + (PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), + binaryStorage.get(0)); ByteArrayRef keyRef = new ByteArrayRef(); keyRef.setData(e.getKey()); @@ -84,8 +94,8 @@ public class LazyHBaseCellMap extends LazyMap { // Value LazyObject value = - LazyFactory.createLazyObject( - getInspector().getMapValueObjectInspector()); + LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), + binaryStorage.get(1)); ByteArrayRef valueRef = new ByteArrayRef(); valueRef.setData(e.getValue()); diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java index 67b9e13..d35bb52 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java @@ -23,12 +23,14 @@ import java.util.Arrays; import java.util.List; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hive.hbase.HBaseSerDe.ColumnMapping; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObject; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -42,10 +44,7 @@ public class LazyHBaseRow extends LazyStruct { * The HBase columns mapping of the row. */ private Result result; - private List hbaseColumnFamilies; - private List hbaseColumnFamiliesBytes; - private List hbaseColumnQualifiers; - private List hbaseColumnQualifiersBytes; + private List columnsMapping; private ArrayList cachedList; /** @@ -59,18 +58,10 @@ public class LazyHBaseRow extends LazyStruct { * Set the HBase row data(a Result writable) for this LazyStruct. * @see LazyHBaseRow#init(Result) */ - public void init( - Result r, - List hbaseColumnFamilies, - List hbaseColumnFamiliesBytes, - List hbaseColumnQualifiers, - List hbaseColumnQualifiersBytes) { + public void init(Result r, List columnsMapping) { result = r; - this.hbaseColumnFamilies = hbaseColumnFamilies; - this.hbaseColumnFamiliesBytes = hbaseColumnFamiliesBytes; - this.hbaseColumnQualifiers = hbaseColumnQualifiers; - this.hbaseColumnQualifiersBytes = hbaseColumnQualifiersBytes; + this.columnsMapping = columnsMapping; setParsed(false); } @@ -79,25 +70,31 @@ public class LazyHBaseRow extends LazyStruct { * @see LazyStruct#parse() */ private void parse() { + if (getFields() == null) { List fieldRefs = ((StructObjectInspector)getInspector()).getAllStructFieldRefs(); - setFields(new LazyObject[fieldRefs.size()]); - for (int i = 0; i < getFields().length; i++) { - String hbaseColumnFamily = hbaseColumnFamilies.get(i); - String hbaseColumnQualifier = hbaseColumnQualifiers.get(i); + LazyObject [] fields = new LazyObject[fieldRefs.size()]; + + for (int i = 0; i < fields.length; i++) { + ColumnMapping colMap = columnsMapping.get(i); - if (hbaseColumnQualifier == null && !HBaseSerDe.isSpecialColumn(hbaseColumnFamily)) { + if (colMap.qualifierName == null && !colMap.hbaseRowKey) { // a column family - getFields()[i] = new LazyHBaseCellMap( + fields[i] = new LazyHBaseCellMap( (LazyMapObjectInspector) fieldRefs.get(i).getFieldObjectInspector()); continue; } - getFields()[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector()); + fields[i] = LazyFactory.createLazyObject( + fieldRefs.get(i).getFieldObjectInspector(), + colMap.binaryStorage.get(0)); } - setFieldInited(new boolean[getFields().length]); + + setFields(fields); + setFieldInited(new boolean[fields.length]); } + Arrays.fill(getFieldInited(), false); setParsed(true); } @@ -119,6 +116,7 @@ public class LazyHBaseRow extends LazyStruct { if (!getParsed()) { parse(); } + return uncheckedGetField(fieldID); } @@ -130,25 +128,27 @@ public class LazyHBaseRow extends LazyStruct { * @return The value of the field */ private Object uncheckedGetField(int fieldID) { - if (!getFieldInited()[fieldID]) { - getFieldInited()[fieldID] = true; + + LazyObject [] fields = getFields(); + boolean [] fieldsInited = getFieldInited(); + + if (!fieldsInited[fieldID]) { + fieldsInited[fieldID] = true; ByteArrayRef ref = null; - String columnFamily = hbaseColumnFamilies.get(fieldID); - String columnQualifier = hbaseColumnQualifiers.get(fieldID); - byte [] columnFamilyBytes = hbaseColumnFamiliesBytes.get(fieldID); - byte [] columnQualifierBytes = hbaseColumnQualifiersBytes.get(fieldID); + ColumnMapping colMap = columnsMapping.get(fieldID); - if (HBaseSerDe.isSpecialColumn(columnFamily)) { - assert(columnQualifier == null); + if (colMap.hbaseRowKey) { ref = new ByteArrayRef(); ref.setData(result.getRow()); } else { - if (columnQualifier == null) { + if (colMap.qualifierName == null) { // it is a column family - ((LazyHBaseCellMap) getFields()[fieldID]).init(result, columnFamilyBytes); + // primitive type for Map can be stored in binary format + ((LazyHBaseCellMap) fields[fieldID]).init( + result, colMap.familyNameBytes, colMap.binaryStorage); } else { // it is a column i.e. a column-family with column-qualifier - byte [] res = result.getValue(columnFamilyBytes, columnQualifierBytes); + byte [] res = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes); if (res == null) { return null; @@ -160,11 +160,11 @@ public class LazyHBaseRow extends LazyStruct { } if (ref != null) { - getFields()[fieldID].init(ref, 0, ref.getData().length); + fields[fieldID].init(ref, 0, ref.getData().length); } } - return getFields()[fieldID].getObject(); + return fields[fieldID].getObject(); } /** diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestSetup.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestSetup.java index 6743e57..3d81736 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestSetup.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestSetup.java @@ -21,15 +21,22 @@ package org.apache.hadoop.hive.hbase; import java.io.File; import java.io.IOException; import java.net.ServerSocket; +import java.util.Arrays; import junit.extensions.TestSetup; import junit.framework.Test; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.mapred.JobConf; @@ -62,8 +69,7 @@ public class HBaseTestSetup extends TestSetup { } conf.set("hbase.rootdir", hbaseRoot); conf.set("hbase.master", hbaseCluster.getMaster().toString()); - conf.set("hbase.zookeeper.property.clientPort", - Integer.toString(zooKeeperPort)); + conf.set("hbase.zookeeper.property.clientPort", Integer.toString(zooKeeperPort)); String auxJars = conf.getAuxJars(); auxJars = ((auxJars == null) ? "" : (auxJars + ",")) + "file://" + new JobConf(conf, HBaseConfiguration.class).getJar(); @@ -82,7 +88,7 @@ public class HBaseTestSetup extends TestSetup { new File(tmpdir, "zookeeper")); conf.set("hbase.zookeeper.property.clientPort", Integer.toString(zooKeeperPort)); - HBaseConfiguration hbaseConf = new HBaseConfiguration(conf); + Configuration hbaseConf = HBaseConfiguration.create(conf); hbaseConf.setInt("hbase.master.port", findFreePort()); hbaseConf.setInt("hbase.master.info.port", -1); hbaseConf.setInt("hbase.regionserver.port", findFreePort()); @@ -90,7 +96,49 @@ public class HBaseTestSetup extends TestSetup { hbaseCluster = new MiniHBaseCluster(hbaseConf, NUM_REGIONSERVERS); conf.set("hbase.master", hbaseCluster.getMaster().toString()); // opening the META table ensures that cluster is running - new HTable(new HBaseConfiguration(conf), HConstants.META_TABLE_NAME); + new HTable(hbaseConf, HConstants.META_TABLE_NAME); + createHBaseTable(hbaseConf); + } + + private void createHBaseTable(Configuration hbaseConf) throws IOException { + final String HBASE_TABLE_NAME = "HiveExternalTable"; + HTableDescriptor htableDesc = new HTableDescriptor(HBASE_TABLE_NAME.getBytes()); + HColumnDescriptor hcolDesc = new HColumnDescriptor("cf".getBytes()); + htableDesc.addFamily(hcolDesc); + HBaseAdmin hbaseAdmin = new HBaseAdmin(hbaseConf); + if(Arrays.asList(hbaseAdmin.listTables()).contains(htableDesc)){ + // if table is already in there, don't recreate. + return; + } + hbaseAdmin.createTable(htableDesc); + HTable htable = new HTable(hbaseConf, HBASE_TABLE_NAME); + + // data + Put [] puts = new Put [] { + new Put("key-1".getBytes()), new Put("key-2".getBytes()), new Put("key-3".getBytes()) }; + + boolean [] booleans = new boolean [] { true, false, true }; + byte [] bytes = new byte [] { Byte.MIN_VALUE, -1, Byte.MAX_VALUE }; + short [] shorts = new short [] { Short.MIN_VALUE, -1, Short.MAX_VALUE }; + int [] ints = new int [] { Integer.MIN_VALUE, -1, Integer.MAX_VALUE }; + long [] longs = new long [] { Long.MIN_VALUE, -1, Long.MAX_VALUE }; + String [] strings = new String [] { "Hadoop, HBase,", "Hive", "Test Strings" }; + float [] floats = new float [] { Float.MIN_VALUE, -1.0F, Float.MAX_VALUE }; + double [] doubles = new double [] { Double.MIN_VALUE, -1.0, Double.MAX_VALUE }; + + // store data + for (int i = 0; i < puts.length; i++) { + puts[i].add("cf".getBytes(), "cq-boolean".getBytes(), Bytes.toBytes(booleans[i])); + puts[i].add("cf".getBytes(), "cq-byte".getBytes(), new byte [] { bytes[i] }); + puts[i].add("cf".getBytes(), "cq-short".getBytes(), Bytes.toBytes(shorts[i])); + puts[i].add("cf".getBytes(), "cq-int".getBytes(), Bytes.toBytes(ints[i])); + puts[i].add("cf".getBytes(), "cq-long".getBytes(), Bytes.toBytes(longs[i])); + puts[i].add("cf".getBytes(), "cq-string".getBytes(), Bytes.toBytes(strings[i])); + puts[i].add("cf".getBytes(), "cq-float".getBytes(), Bytes.toBytes(floats[i])); + puts[i].add("cf".getBytes(), "cq-double".getBytes(), Bytes.toBytes(doubles[i])); + + htable.put(puts[i]); + } } private static int findFreePort() throws IOException { diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java index 8113961..e786586 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java @@ -50,9 +50,9 @@ import org.apache.hadoop.io.Text; public class TestHBaseSerDe extends TestCase { /** - * Test the LazySimpleSerDe class. + * Test the default behavior of the Lazy family of objects and object inspectors. */ - public void testHBaseSerDe() throws SerDeException { + public void testHBaseSerDeI() throws SerDeException { // Create the SerDe HBaseSerDe serDe = new HBaseSerDe(); Configuration conf = new Configuration(); @@ -140,6 +140,466 @@ public class TestHBaseSerDe extends TestCase { assertEquals("Serialized data", p.toString(), serializedPut.toString()); } + // No specifications default to UTF8 String storage for backwards compatibility + private Properties createPropertiesI_I() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool"); + tbl.setProperty("columns.types", + "string,tinyint:smallint:int:bigint:float:double:string:boolean"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + "cola:byte,colb:short,colc:int,cola:long,colb:float,colc:double,cola:string,colb:boolean"); + return tbl; + } + + // Default column storage specification inherits from table level default + // (in this case a missing specification) of UTF String storage + private Properties createPropertiesI_II() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool"); + tbl.setProperty("columns.types", + "string,tinyint:smallint:int:bigint:float:double:string:boolean"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#-,cola:byte#s,colb:short#-,colc:int#s,cola:long#s,colb:float#-,colc:double#-," + + "cola:string#s,colb:boolean#s"); + return tbl; + } + + // String storage type overrides table level default of binary storage + private Properties createPropertiesI_III() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool"); + tbl.setProperty("columns.types", + "string,tinyint:smallint:int:bigint:float:double:string:boolean"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#s,cola:byte#s,colb:short#s,colc:int#s,cola:long#s,colb:float#s,colc:double#s," + + "cola:string#s,colb:boolean#s"); + tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary"); + return tbl; + } + + // String type is never stored as anything other than an escaped string + // A specification of binary storage should not affect ser/de. + private Properties createPropertiesI_IV() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool"); + tbl.setProperty("columns.types", + "string,tinyint:smallint:int:bigint:float:double:string:boolean"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#-,cola:byte#s,colb:short#s,colc:int#s,cola:long#s,colb:float#s,colc:double#s," + + "cola:string#b,colb:boolean#s"); + tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary"); + return tbl; + } + + public void testHBaseSerDeII() throws SerDeException { + + byte [] cfa = "cfa".getBytes(); + byte [] cfb = "cfb".getBytes(); + byte [] cfc = "cfc".getBytes(); + + byte [] qualByte = "byte".getBytes(); + byte [] qualShort = "short".getBytes(); + byte [] qualInt = "int".getBytes(); + byte [] qualLong = "long".getBytes(); + byte [] qualFloat = "float".getBytes(); + byte [] qualDouble = "double".getBytes(); + byte [] qualString = "string".getBytes(); + byte [] qualBool = "boolean".getBytes(); + + byte [] rowKey = Bytes.toBytes("test-row-2"); + + // Data + List kvs = new ArrayList(); + + kvs.add(new KeyValue(rowKey, cfa, qualByte, new byte [] { Byte.MIN_VALUE })); + kvs.add(new KeyValue(rowKey, cfb, qualShort, Bytes.toBytes(Short.MIN_VALUE))); + kvs.add(new KeyValue(rowKey, cfc, qualInt, Bytes.toBytes(Integer.MIN_VALUE))); + kvs.add(new KeyValue(rowKey, cfa, qualLong, Bytes.toBytes(Long.MIN_VALUE))); + kvs.add(new KeyValue(rowKey, cfb, qualFloat, Bytes.toBytes(Float.MIN_VALUE))); + kvs.add(new KeyValue(rowKey, cfc, qualDouble, Bytes.toBytes(Double.MAX_VALUE))); + kvs.add(new KeyValue(rowKey, cfa, qualString, Bytes.toBytes( + "Hadoop, HBase, and Hive Again!"))); + kvs.add(new KeyValue(rowKey, cfb, qualBool, Bytes.toBytes(false))); + + Collections.sort(kvs, KeyValue.COMPARATOR); + Result r = new Result(kvs); + + Put p = new Put(rowKey); + + p.add(cfa, qualByte, new byte [] { Byte.MIN_VALUE }); + p.add(cfb, qualShort, Bytes.toBytes(Short.MIN_VALUE)); + p.add(cfc, qualInt, Bytes.toBytes(Integer.MIN_VALUE)); + p.add(cfa, qualLong, Bytes.toBytes(Long.MIN_VALUE)); + p.add(cfb, qualFloat, Bytes.toBytes(Float.MIN_VALUE)); + p.add(cfc, qualDouble, Bytes.toBytes(Double.MAX_VALUE)); + p.add(cfa, qualString, Bytes.toBytes("Hadoop, HBase, and Hive Again!")); + p.add(cfb, qualBool, Bytes.toBytes(false)); + + Object[] expectedFieldsData = { + new Text("test-row-2"), + new ByteWritable(Byte.MIN_VALUE), + new ShortWritable(Short.MIN_VALUE), + new IntWritable(Integer.MIN_VALUE), + new LongWritable(Long.MIN_VALUE), + new FloatWritable(Float.MIN_VALUE), + new DoubleWritable(Double.MAX_VALUE), + new Text("Hadoop, HBase, and Hive Again!"), + new BooleanWritable(false) + }; + + // Create, initialize, and test the SerDe + HBaseSerDe serDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesII_I(); + serDe.initialize(conf, tbl); + + deserializeAndSerialize(serDe, r, p, expectedFieldsData); + + serDe = new HBaseSerDe(); + conf = new Configuration(); + tbl = createPropertiesII_II(); + serDe.initialize(conf, tbl); + + deserializeAndSerialize(serDe, r, p, expectedFieldsData); + + serDe = new HBaseSerDe(); + conf = new Configuration(); + tbl = createPropertiesII_III(); + serDe.initialize(conf, tbl); + + deserializeAndSerialize(serDe, r, p, expectedFieldsData); + } + + private Properties createPropertiesII_I() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool"); + tbl.setProperty("columns.types", + "string,tinyint:smallint:int:bigint:float:double:string:boolean"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#-,cfa:byte#b,cfb:short#b,cfc:int#-,cfa:long#b,cfb:float#-,cfc:double#b," + + "cfa:string#b,cfb:boolean#-"); + tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary"); + return tbl; + } + + private Properties createPropertiesII_II() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool"); + tbl.setProperty("columns.types", + "string,tinyint:smallint:int:bigint:float:double:string:boolean"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#b,cfa:byte#b,cfb:short#b,cfc:int#b,cfa:long#b,cfb:float#b,cfc:double#b," + + "cfa:string#b,cfb:boolean#b"); + tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string"); + return tbl; + } + + private Properties createPropertiesII_III() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", "key,abyte,ashort,aint,along,afloat,adouble,astring,abool"); + tbl.setProperty("columns.types", + "string,tinyint:smallint:int:bigint:float:double:string:boolean"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#-,cfa:byte#b,cfb:short#b,cfc:int#b,cfa:long#b,cfb:float#b,cfc:double#b," + + "cfa:string#-,cfb:boolean#b"); + return tbl; + } + + public void testHBaseSerDeWithHiveMapToHBaseColumnFamily() throws SerDeException { + + byte [] cfint = "cf-int".getBytes(); + byte [] cfbyte = "cf-byte".getBytes(); + byte [] cfshort = "cf-short".getBytes(); + byte [] cflong = "cf-long".getBytes(); + byte [] cffloat = "cf-float".getBytes(); + byte [] cfdouble = "cf-double".getBytes(); + byte [] cfbool = "cf-bool".getBytes(); + + byte [][] columnFamilies = + new byte [][] {cfint, cfbyte, cfshort, cflong, cffloat, cfdouble, cfbool}; + + byte [][] rowKeys = new byte [][] { + Integer.toString(1).getBytes(), + Integer.toString(Integer.MIN_VALUE).getBytes(), + Integer.toString(Integer.MAX_VALUE).getBytes() + }; + + byte [][][] columnQualifiersAndValues = new byte [][][] { + {Bytes.toBytes(1), new byte [] {1}, Bytes.toBytes((short) 1), + Bytes.toBytes((long) 1), Bytes.toBytes((float) 1.0F), Bytes.toBytes(1.0), + Bytes.toBytes(true)}, + {Bytes.toBytes(Integer.MIN_VALUE), new byte [] {Byte.MIN_VALUE}, + Bytes.toBytes((short) Short.MIN_VALUE), Bytes.toBytes((long) Long.MIN_VALUE), + Bytes.toBytes((float) Float.MIN_VALUE), Bytes.toBytes(Double.MIN_VALUE), + Bytes.toBytes(false)}, + {Bytes.toBytes(Integer.MAX_VALUE), new byte [] {Byte.MAX_VALUE}, + Bytes.toBytes((short) Short.MAX_VALUE), Bytes.toBytes((long) Long.MAX_VALUE), + Bytes.toBytes((float) Float.MAX_VALUE), Bytes.toBytes(Double.MAX_VALUE), + Bytes.toBytes(true)} + }; + + List kvs = new ArrayList(); + Result [] r = new Result [] {null, null, null}; + Put [] p = new Put [] {null, null, null}; + + for (int i = 0; i < r.length; i++) { + kvs.clear(); + p[i] = new Put(rowKeys[i]); + + for (int j = 0; j < columnQualifiersAndValues[i].length; j++) { + kvs.add(new KeyValue(rowKeys[i], columnFamilies[j], columnQualifiersAndValues[i][j], + columnQualifiersAndValues[i][j])); + p[i].add(columnFamilies[j], columnQualifiersAndValues[i][j], + columnQualifiersAndValues[i][j]); + } + + r[i] = new Result(kvs); + } + + Object [][] expectedData = { + {new Text(Integer.toString(1)), new IntWritable(1), new ByteWritable((byte) 1), + new ShortWritable((short) 1), new LongWritable(1), new FloatWritable(1.0F), + new DoubleWritable(1.0), new BooleanWritable(true)}, + {new Text(Integer.toString(Integer.MIN_VALUE)), new IntWritable(Integer.MIN_VALUE), + new ByteWritable(Byte.MIN_VALUE), new ShortWritable(Short.MIN_VALUE), + new LongWritable(Long.MIN_VALUE), new FloatWritable(Float.MIN_VALUE), + new DoubleWritable(Double.MIN_VALUE), new BooleanWritable(false)}, + {new Text(Integer.toString(Integer.MAX_VALUE)), new IntWritable(Integer.MAX_VALUE), + new ByteWritable(Byte.MAX_VALUE), new ShortWritable(Short.MAX_VALUE), + new LongWritable(Long.MAX_VALUE), new FloatWritable(Float.MAX_VALUE), + new DoubleWritable(Double.MAX_VALUE), new BooleanWritable(true)}}; + + HBaseSerDe hbaseSerDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveMapHBaseColumnFamily(); + hbaseSerDe.initialize(conf, tbl); + + deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, + columnFamilies, columnQualifiersAndValues); + + hbaseSerDe = new HBaseSerDe(); + conf = new Configuration(); + tbl = createPropertiesForHiveMapHBaseColumnFamilyII(); + hbaseSerDe.initialize(conf, tbl); + + deserializeAndSerializeHiveMapHBaseColumnFamily(hbaseSerDe, r, p, expectedData, rowKeys, + columnFamilies, columnQualifiersAndValues); + } + + private void deserializeAndSerializeHiveMapHBaseColumnFamily( + HBaseSerDe hbaseSerDe, + Result [] r, + Put [] p, + Object [][] expectedData, + byte [][] rowKeys, + byte [][] columnFamilies, + byte [][][] columnQualifiersAndValues) throws SerDeException { + + StructObjectInspector soi = (StructObjectInspector) hbaseSerDe.getObjectInspector(); + List fieldRefs = soi.getAllStructFieldRefs(); + assertEquals(8, fieldRefs.size()); + + // Deserialize + for (int i = 0; i < r.length; i++) { + Object row = hbaseSerDe.deserialize(r[i]); + Put serializedPut = (Put) hbaseSerDe.serialize(row, soi); + byte [] rowKey = serializedPut.getRow(); + + for (int k = 0; k < rowKey.length; k++) { + assertEquals(rowKey[k], rowKeys[i][k]); + } + + assertEquals(columnFamilies.length, serializedPut.numFamilies()); + + for (int j = 0; j < fieldRefs.size(); j++) { + Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j)); + + assertNotNull(fieldData); + + if (fieldData instanceof LazyPrimitive) { + assertEquals(expectedData[i][j], + ((LazyPrimitive) fieldData).getWritableObject()); + } else if (fieldData instanceof LazyHBaseCellMap) { + LazyPrimitive lazyPrimitive = (LazyPrimitive) + ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedData[i][j]); + assertEquals(expectedData[i][j], lazyPrimitive.getWritableObject()); + } else { + fail("Error: field data not an instance of LazyPrimitive or LazyMap"); + } + } + } + } + + private Properties createPropertiesForHiveMapHBaseColumnFamily() { + Properties tbl = new Properties(); + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty(Constants.LIST_COLUMNS, + "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool"); + tbl.setProperty(Constants.LIST_COLUMN_TYPES, + "string:map:map:map:map:" + + "map:map:map"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#-,cf-int:#b:b,cf-byte:#b:b,cf-short:#b:b,cf-long:#b:b,cf-float:#b:b,cf-double:#b:b," + + "cf-bool:#b:b"); + return tbl; + } + + private Properties createPropertiesForHiveMapHBaseColumnFamilyII() { + Properties tbl = new Properties(); + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty(Constants.LIST_COLUMNS, + "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool"); + tbl.setProperty(Constants.LIST_COLUMN_TYPES, + "string:map:map:map:map:" + + "map:map:map"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#-,cf-int:#-:-,cf-byte:#-:-,cf-short:#-:-,cf-long:#-:-,cf-float:#-:-,cf-double:#-:-," + + "cf-bool:#-:-"); + tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary"); + return tbl; + } + + public void testHBaseSerDeWithHiveMapToHBaseColumnFamilyII() throws SerDeException { + + byte [] cfbyte = "cf-byte".getBytes(); + byte [] cfshort = "cf-short".getBytes(); + byte [] cfint = "cf-int".getBytes(); + byte [] cflong = "cf-long".getBytes(); + byte [] cffloat = "cf-float".getBytes(); + byte [] cfdouble = "cf-double".getBytes(); + byte [] cfstring = "cf-string".getBytes(); + byte [] cfbool = "cf-bool".getBytes(); + + byte [][] columnFamilies = + new byte [][] {cfbyte, cfshort, cfint, cflong, cffloat, cfdouble, cfstring, cfbool}; + + byte [] rowKey = Bytes.toBytes("row-key"); + + byte [][] columnQualifiersAndValues = new byte [][] { + Bytes.toBytes("123"), Bytes.toBytes("456"), Bytes.toBytes("789"), Bytes.toBytes("1000"), + Bytes.toBytes("-0.01"), Bytes.toBytes("5.3"), Bytes.toBytes("Hive"), + Bytes.toBytes("true") + }; + + Put p = new Put(rowKey); + List kvs = new ArrayList(); + + for (int j = 0; j < columnQualifiersAndValues.length; j++) { + kvs.add(new KeyValue(rowKey, + columnFamilies[j], columnQualifiersAndValues[j], columnQualifiersAndValues[j])); + p.add(columnFamilies[j], columnQualifiersAndValues[j], columnQualifiersAndValues[j]); + } + + Result r = new Result(kvs); + + Object [] expectedData = { + new Text("row-key"), new ByteWritable((byte) 123), new ShortWritable((short) 456), + new IntWritable(789), new LongWritable(1000), new FloatWritable(-0.01F), + new DoubleWritable(5.3), new Text("Hive"), new BooleanWritable(true) + }; + + HBaseSerDe hbaseSerDe = new HBaseSerDe(); + Configuration conf = new Configuration(); + Properties tbl = createPropertiesForHiveMapHBaseColumnFamilyII_I(); + hbaseSerDe.initialize(conf, tbl); + + deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData, + columnFamilies, columnQualifiersAndValues); + + hbaseSerDe = new HBaseSerDe(); + conf = new Configuration(); + tbl = createPropertiesForHiveMapHBaseColumnFamilyII_II(); + hbaseSerDe.initialize(conf, tbl); + + deserializeAndSerializeHiveMapHBaseColumnFamilyII(hbaseSerDe, r, p, expectedData, + columnFamilies, columnQualifiersAndValues); + } + + private Properties createPropertiesForHiveMapHBaseColumnFamilyII_I() { + Properties tbl = new Properties(); + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty(Constants.LIST_COLUMNS, + "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool"); + tbl.setProperty(Constants.LIST_COLUMN_TYPES, + "string:map:map:map:map:" + + "map:map:map:map"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#s,cf-byte:#-:s,cf-short:#s:-,cf-int:#s:s,cf-long:#-:-,cf-float:#s:-,cf-double:#-:s," + + "cf-string:#s:s,cf-bool:#-:-"); + return tbl; + } + + private Properties createPropertiesForHiveMapHBaseColumnFamilyII_II() { + Properties tbl = new Properties(); + tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty(Constants.LIST_COLUMNS, + "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool"); + tbl.setProperty(Constants.LIST_COLUMN_TYPES, + "string:map:map:map:map:" + + "map:map:map:map"); + tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, + ":key#s,cf-byte:#s:s,cf-short:#s:s,cf-int:#s:s,cf-long:#s:s,cf-float:#s:s,cf-double:#s:s," + + "cf-string:#s:s,cf-bool:#s:s"); + tbl.setProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "binary"); + return tbl; + } + + private void deserializeAndSerializeHiveMapHBaseColumnFamilyII( + HBaseSerDe hbaseSerDe, + Result r, + Put p, + Object [] expectedData, + byte [][] columnFamilies, + byte [][] columnQualifiersAndValues) throws SerDeException { + + StructObjectInspector soi = (StructObjectInspector) hbaseSerDe.getObjectInspector(); + List fieldRefs = soi.getAllStructFieldRefs(); + assertEquals(9, fieldRefs.size()); + + // Deserialize + Object row = hbaseSerDe.deserialize(r); + + for (int j = 0; j < fieldRefs.size(); j++) { + Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j)); + assertNotNull(fieldData); + + if (fieldData instanceof LazyPrimitive) { + assertEquals(expectedData[j], ((LazyPrimitive) fieldData).getWritableObject()); + } else if (fieldData instanceof LazyHBaseCellMap) { + LazyPrimitive lazyPrimitive = (LazyPrimitive) + ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedData[j]); + assertEquals(expectedData[j], lazyPrimitive.getWritableObject()); + } else { + fail("Error: field data not an instance of LazyPrimitive or LazyHBaseCellMap"); + } + } + + // Serialize + Put serializedPut = (Put) hbaseSerDe.serialize(row, soi); + assertEquals("Serialized data: ", p.toString(), serializedPut.toString()); + } + private Properties createProperties() { Properties tbl = new Properties(); @@ -152,4 +612,5 @@ public class TestHBaseSerDe extends TestCase { "cola:byte,colb:short,colc:int,cola:long,colb:float,colc:double,cola:string,colb:boolean"); return tbl; } + } diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java index ef20b50..f91be4c 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.hbase; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import junit.framework.TestCase; @@ -27,17 +28,28 @@ import junit.framework.TestCase; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.hbase.HBaseSerDe.ColumnMapping; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; import org.apache.hadoop.hive.serde2.lazy.LazyString; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * TestLazyHBaseObject is a test for the LazyHBaseXXX classes. @@ -75,7 +87,11 @@ public class TestLazyHBaseObject extends TestCase { Result r = new Result(kvs); - b.init(r, "cfb".getBytes()); + List mapBinaryStorage = new ArrayList(); + mapBinaryStorage.add(false); + mapBinaryStorage.add(false); + + b.init(r, "cfb".getBytes(), mapBinaryStorage); assertEquals( new Text("def"), @@ -131,7 +147,11 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfc"), Bytes.toBytes("col3"), Bytes.toBytes("cfccol3"))); Result r = new Result(kvs); - b.init(r, "cfb".getBytes()); + List mapBinaryStorage = new ArrayList(); + mapBinaryStorage.add(false); + mapBinaryStorage.add(false); + + b.init(r, "cfb".getBytes(), mapBinaryStorage); assertEquals( new Text("d\tf"), @@ -158,33 +178,305 @@ public class TestLazyHBaseObject extends TestCase { } /** + * Test the LazyHBaseCellMap class for the case where both the key and the value in the family + * map are stored in binary format using the appropriate LazyPrimitive objects. + */ + public void testLazyHBaseCellMap3() { + + Text nullSequence = new Text("\\N"); + TypeInfo mapBinaryIntKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); + ObjectInspector oi = LazyFactory.createLazyObjectInspector( + mapBinaryIntKeyValue, new byte [] {(byte)1, (byte) 2}, 0, nullSequence, false, (byte) 0); + LazyHBaseCellMap hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + + List kvs = new ArrayList(); + byte [] rowKey = "row-key".getBytes(); + byte [] cfInt = "cf-int".getBytes(); + kvs.add(new KeyValue(rowKey, cfInt, Bytes.toBytes(1), Bytes.toBytes(1))); + Result result = new Result(kvs); + List mapBinaryStorage = new ArrayList(); + mapBinaryStorage.add(true); + mapBinaryStorage.add(true); + hbaseCellMap.init(result, cfInt, mapBinaryStorage); + IntWritable expectedIntValue = new IntWritable(1); + LazyPrimitive lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedIntValue); + + assertEquals(expectedIntValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue( + rowKey, cfInt, Bytes.toBytes(Integer.MIN_VALUE), Bytes.toBytes(Integer.MIN_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfInt, mapBinaryStorage); + expectedIntValue = new IntWritable(Integer.MIN_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedIntValue); + + assertEquals(expectedIntValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue( + rowKey, cfInt, Bytes.toBytes(Integer.MAX_VALUE), Bytes.toBytes(Integer.MAX_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfInt, mapBinaryStorage); + expectedIntValue = new IntWritable(Integer.MAX_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedIntValue); + + assertEquals(expectedIntValue, lazyPrimitive.getWritableObject()); + + TypeInfo mapBinaryByteKeyValue = + TypeInfoUtils.getTypeInfoFromTypeString("map"); + oi = LazyFactory.createLazyObjectInspector( + mapBinaryByteKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); + hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + byte [] cfByte = "cf-byte".getBytes(); + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfByte, new byte [] {(byte) 1}, new byte [] {(byte) 1})); + result = new Result(kvs); + hbaseCellMap.init(result, cfByte, mapBinaryStorage); + ByteWritable expectedByteValue = new ByteWritable((byte) 1); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedByteValue); + + assertEquals(expectedByteValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfByte, new byte [] {Byte.MIN_VALUE}, + new byte [] {Byte.MIN_VALUE})); + result = new Result(kvs); + hbaseCellMap.init(result, cfByte, mapBinaryStorage); + expectedByteValue = new ByteWritable(Byte.MIN_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedByteValue); + + assertEquals(expectedByteValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfByte, new byte [] {Byte.MAX_VALUE}, + new byte [] {Byte.MAX_VALUE})); + result = new Result(kvs); + hbaseCellMap.init(result, cfByte, mapBinaryStorage); + expectedByteValue = new ByteWritable(Byte.MAX_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedByteValue); + + assertEquals(expectedByteValue, lazyPrimitive.getWritableObject()); + + TypeInfo mapBinaryShortKeyValue = + TypeInfoUtils.getTypeInfoFromTypeString("map"); + oi = LazyFactory.createLazyObjectInspector( + mapBinaryShortKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); + hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + byte [] cfShort = "cf-short".getBytes(); + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfShort, Bytes.toBytes((short) 1), Bytes.toBytes((short) 1))); + result = new Result(kvs); + hbaseCellMap.init(result, cfShort, mapBinaryStorage); + ShortWritable expectedShortValue = new ShortWritable((short) 1); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedShortValue); + + assertEquals(expectedShortValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfShort, Bytes.toBytes(Short.MIN_VALUE), + Bytes.toBytes(Short.MIN_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfShort, mapBinaryStorage); + expectedShortValue = new ShortWritable(Short.MIN_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedShortValue); + + assertEquals(expectedShortValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfShort, Bytes.toBytes(Short.MAX_VALUE), + Bytes.toBytes(Short.MAX_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfShort, mapBinaryStorage); + expectedShortValue = new ShortWritable(Short.MAX_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedShortValue); + + assertEquals(expectedShortValue, lazyPrimitive.getWritableObject()); + + TypeInfo mapBinaryLongKeyValue = + TypeInfoUtils.getTypeInfoFromTypeString("map"); + oi = LazyFactory.createLazyObjectInspector( + mapBinaryLongKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); + hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + byte [] cfLong = "cf-long".getBytes(); + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfLong, Bytes.toBytes((long) 1), Bytes.toBytes((long) 1))); + result = new Result(kvs); + hbaseCellMap.init(result, cfLong, mapBinaryStorage); + LongWritable expectedLongValue = new LongWritable(1); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedLongValue); + + assertEquals(expectedLongValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfLong, Bytes.toBytes(Long.MIN_VALUE), + Bytes.toBytes(Long.MIN_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfLong, mapBinaryStorage); + expectedLongValue = new LongWritable(Long.MIN_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedLongValue); + + assertEquals(expectedLongValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfLong, Bytes.toBytes(Long.MAX_VALUE), + Bytes.toBytes(Long.MAX_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfLong, mapBinaryStorage); + expectedLongValue = new LongWritable(Long.MAX_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedLongValue); + + assertEquals(expectedLongValue, lazyPrimitive.getWritableObject()); + + TypeInfo mapBinaryFloatKeyValue = + TypeInfoUtils.getTypeInfoFromTypeString("map"); + oi = LazyFactory.createLazyObjectInspector( + mapBinaryFloatKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, + (byte) 0); + hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + byte [] cfFloat = "cf-float".getBytes(); + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfFloat, Bytes.toBytes((float) 1.0F), + Bytes.toBytes((float) 1.0F))); + result = new Result(kvs); + hbaseCellMap.init(result, cfFloat, mapBinaryStorage); + FloatWritable expectedFloatValue = new FloatWritable(1.0F); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedFloatValue); + + assertEquals(expectedFloatValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfFloat, Bytes.toBytes((float) Float.MIN_VALUE), + Bytes.toBytes((float) Float.MIN_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfFloat, mapBinaryStorage); + expectedFloatValue = new FloatWritable(Float.MIN_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedFloatValue); + + assertEquals(expectedFloatValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfFloat, Bytes.toBytes((float) Float.MAX_VALUE), + Bytes.toBytes((float) Float.MAX_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfFloat, mapBinaryStorage); + expectedFloatValue = new FloatWritable(Float.MAX_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedFloatValue); + + assertEquals(expectedFloatValue, lazyPrimitive.getWritableObject()); + + TypeInfo mapBinaryDoubleKeyValue = + TypeInfoUtils.getTypeInfoFromTypeString("map"); + oi = LazyFactory.createLazyObjectInspector( + mapBinaryDoubleKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, + (byte) 0); + hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + byte [] cfDouble = "cf-double".getBytes(); + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfDouble, Bytes.toBytes(1.0), Bytes.toBytes(1.0))); + result = new Result(kvs); + hbaseCellMap.init(result, cfDouble, mapBinaryStorage); + DoubleWritable expectedDoubleValue = new DoubleWritable(1.0); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedDoubleValue); + + assertEquals(expectedDoubleValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfDouble, Bytes.toBytes(Double.MIN_VALUE), + Bytes.toBytes(Double.MIN_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfDouble, mapBinaryStorage); + expectedDoubleValue = new DoubleWritable(Double.MIN_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedDoubleValue); + + assertEquals(expectedDoubleValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfDouble, Bytes.toBytes(Double.MAX_VALUE), + Bytes.toBytes(Double.MAX_VALUE))); + result = new Result(kvs); + hbaseCellMap.init(result, cfDouble, mapBinaryStorage); + expectedDoubleValue = new DoubleWritable(Double.MAX_VALUE); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedDoubleValue); + + assertEquals(expectedDoubleValue, lazyPrimitive.getWritableObject()); + + TypeInfo mapBinaryBooleanKeyValue = + TypeInfoUtils.getTypeInfoFromTypeString("map"); + oi = LazyFactory.createLazyObjectInspector( + mapBinaryBooleanKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, + (byte) 0); + hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + byte [] cfBoolean = "cf-boolean".getBytes(); + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfBoolean, Bytes.toBytes(false), Bytes.toBytes(false))); + result = new Result(kvs); + hbaseCellMap.init(result, cfBoolean, mapBinaryStorage); + BooleanWritable expectedBooleanValue = new BooleanWritable(false); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedBooleanValue); + + assertEquals(expectedBooleanValue, lazyPrimitive.getWritableObject()); + + kvs.clear(); + kvs.add(new KeyValue(rowKey, cfBoolean, Bytes.toBytes(true), Bytes.toBytes(true))); + result = new Result(kvs); + hbaseCellMap.init(result, cfBoolean, mapBinaryStorage); + expectedBooleanValue = new BooleanWritable(true); + lazyPrimitive = + (LazyPrimitive) hbaseCellMap.getMapValueElement(expectedBooleanValue); + + assertEquals(expectedBooleanValue, lazyPrimitive.getWritableObject()); + } + + /** * Test the LazyHBaseRow class with one-for-one mappings between * Hive fields and HBase columns. */ public void testLazyHBaseRow1() { List fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString( - "string,int,array,map,string"); - List fieldNames = Arrays.asList( - new String[]{"key", "a", "b", "c", "d"}); + "string,int,array,map,string"); + List fieldNames = Arrays.asList(new String[]{"key", "a", "b", "c", "d"}); Text nullSequence = new Text("\\N"); String hbaseColsMapping = ":key,cfa:a,cfa:b,cfb:c,cfb:d"; - List colFamily = new ArrayList(); - List colQual = new ArrayList(); - List colFamilyBytes = new ArrayList(); - List colQualBytes = new ArrayList(); - - int iKey = -1; + List columnsMapping = null; try { - iKey = HBaseSerDe.parseColumnMapping( - hbaseColsMapping, colFamily, colFamilyBytes, colQual, colQualBytes); + columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColsMapping); } catch (SerDeException e) { fail(e.toString()); } - assertEquals(0, iKey); + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + + if (!colMap.hbaseRowKey && colMap.qualifierName == null) { + colMap.binaryStorage.add(false); + colMap.binaryStorage.add(false); + } else { + colMap.binaryStorage.add(false); + } + } ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] {' ', ':', '='}, @@ -203,7 +495,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("hi"))); Result r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':123,'b':['a','b','c']," @@ -217,7 +509,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g"))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':123,'b':null," @@ -233,7 +525,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no"))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':null,'b':['a']," @@ -247,7 +539,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no"))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':null,'b':['','a','','']," @@ -265,7 +557,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes(""))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( "{'key':'test-row','a':123,'b':[],'c':{},'d':''}".replace("'", "\""), @@ -284,20 +576,25 @@ public class TestLazyHBaseObject extends TestCase { List fieldNames = Arrays.asList( new String[]{"key", "a", "b", "c", "d"}); Text nullSequence = new Text("\\N"); - + List columnsMapping = null; String hbaseColsMapping = ":key,cfa:a,cfa:b,cfb:,cfc:d"; - List colFamily = new ArrayList(); - List colQual = new ArrayList(); - List colFamilyBytes = new ArrayList(); - List colQualBytes = new ArrayList(); - int iKey = -1; + try { - iKey = HBaseSerDe.parseColumnMapping( - hbaseColsMapping, colFamily, colFamilyBytes, colQual, colQualBytes); + columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColsMapping); } catch (SerDeException e) { fail(e.toString()); } - assertEquals(0, iKey); + + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + + if (!colMap.hbaseRowKey && colMap.qualifierName == null) { + colMap.binaryStorage.add(false); + colMap.binaryStorage.add(false); + } else { + colMap.binaryStorage.add(false); + } + } ObjectInspector oi = LazyFactory.createLazyStructInspector( fieldNames, @@ -319,7 +616,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes("hi"))); Result r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':123,'b':['a','b','c']," @@ -335,7 +632,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfb"), Bytes.toBytes("f"), Bytes.toBytes("g"))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':123,'b':null," @@ -351,7 +648,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes("no"))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':null,'b':['a']," @@ -365,7 +662,7 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes("no"))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( ("{'key':'test-row','a':null,'b':['','a','','']," @@ -381,10 +678,164 @@ public class TestLazyHBaseObject extends TestCase { Bytes.toBytes("cfc"), Bytes.toBytes("d"), Bytes.toBytes(""))); r = new Result(kvs); - o.init(r, colFamily, colFamilyBytes, colQual, colQualBytes); + o.init(r, columnsMapping); assertEquals( "{'key':'test-row','a':123,'b':[],'c':{},'d':''}".replace("'", "\""), SerDeUtils.getJSONString(o, oi)); } + + /** + * Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and + * HBase column family/column qualifier pairs. The column types are primitive and fields + * are stored in binary format in HBase. + */ + public void testLazyHBaseRow3() { + + List fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString( + "string,int,tinyint,smallint,bigint,float,double,string,boolean"); + List fieldNames = Arrays.asList( + new String [] {"key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", + "c_string", "c_bool"}); + Text nullSequence = new Text("\\N"); + String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + + "cf-string:cq-string#str,cf-bool:cq-bool#bin"; + List columnsMapping = null; + + try { + columnsMapping = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping); + } catch (SerDeException sde) { + fail(sde.toString()); + } + + for (int i = 0; i < columnsMapping.size(); i++) { + ColumnMapping colMap = columnsMapping.get(i); + + if (i == 0 || i == 7) { + colMap.binaryStorage.add(false); + } else { + colMap.binaryStorage.add(true); + } + } + + ObjectInspector oi = + LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, + new byte [] {' ', ':', '='}, nullSequence, false, false, (byte) 0); + + LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi); + + byte [] rowKey = "row-key".getBytes(); + List kvs = new ArrayList(); + byte [] value; + + for (int i = 1; i < columnsMapping.size(); i++) { + + switch (i) { + + case 1: + value = Bytes.toBytes(1); + break; + + case 2: + value = new byte[]{(byte)1}; + break; + + case 3: + value = Bytes.toBytes((short) 1); + break; + + case 4: + value = Bytes.toBytes((long) 1); + break; + + case 5: + value = Bytes.toBytes((float) 1.0F); + break; + + case 6: + value = Bytes.toBytes((double) 1.0); + break; + + case 7: + value = "Hadoop, Hive, with HBase storage handler.".getBytes(); + break; + + case 8: + value = Bytes.toBytes(true); + break; + + default: + throw new RuntimeException("Not expected: " + i); + } + + ColumnMapping colMap = columnsMapping.get(i); + kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value)); + } + + Collections.sort(kvs, KeyValue.COMPARATOR); + Result result = new Result(kvs); + o.init(result, columnsMapping); + List fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs(); + + + for (int i = 0; i < fieldRefs.size(); i++) { + Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i)); + + assert(fieldData != null); + assert(fieldData instanceof LazyPrimitive); + Writable writable = ((LazyPrimitive) fieldData).getWritableObject(); + + switch (i) { + case 0: + Text text = new Text("row-key"); + assertEquals(text, writable); + break; + + case 1: + IntWritable iw = new IntWritable(1); + assertEquals(iw, writable); + break; + + case 2: + ByteWritable bw = new ByteWritable((byte) 1); + assertEquals(bw, writable); + break; + + case 3: + ShortWritable sw = new ShortWritable((short) 1); + assertEquals(sw, writable); + break; + + case 4: + LongWritable lw = new LongWritable(1); + assertEquals(lw, writable); + break; + + case 5: + FloatWritable fw = new FloatWritable(1.0F); + assertEquals(fw, writable); + break; + + case 6: + DoubleWritable dw = new DoubleWritable(1.0); + assertEquals(dw, writable); + break; + + case 7: + Text t = new Text("Hadoop, Hive, with HBase storage handler."); + assertEquals(t, writable); + break; + + case 8: + BooleanWritable boolWritable = new BooleanWritable(true); + assertEquals(boolWritable, writable); + break; + + default: + fail("Error: Unanticipated value in deserializing fields for HBaseSerDe."); + break; + } + } + } } diff --git hbase-handler/src/test/queries/hbase_binary_external_table_queries.q hbase-handler/src/test/queries/hbase_binary_external_table_queries.q new file mode 100644 index 0000000..9bfaff4 --- /dev/null +++ hbase-handler/src/test/queries/hbase_binary_external_table_queries.q @@ -0,0 +1,38 @@ +DROP TABLE t_ext_hbase_1; + +CREATE EXTERNAL TABLE t_ext_hbase_1 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:cq-boolean,cf:cq-byte,cf:cq-short,cf:cq-int,cf:cq-long,cf:cq-string,cf:cq-float,cf:cq-double") +TBLPROPERTIES ("hbase.table.name" = "HiveExternalTable"); + +SELECT * FROM t_ext_hbase_1; + +DROP TABLE t_ext_hbase_1; +DROP TABLE t_ext_hbase_2; + +CREATE EXTERNAL TABLE t_ext_hbase_2 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:cq-boolean#b,cf:cq-byte#b,cf:cq-short#b,cf:cq-int#b,cf:cq-long#b,cf:cq-string#b,cf:cq-float#b,cf:cq-double#b") +TBLPROPERTIES ("hbase.table.name" = "HiveExternalTable"); + +SELECT * FROM t_ext_hbase_2; + +DROP TABLE t_ext_hbase_2; +DROP TABLE t_ext_hbase_3; + +CREATE EXTERNAL TABLE t_ext_hbase_3 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:cq-boolean,cf:cq-byte,cf:cq-short,cf:cq-int,cf:cq-long,cf:cq-string,cf:cq-float,cf:cq-double") +TBLPROPERTIES ( +"hbase.table.name" = "HiveExternalTable", +"hbase.table.default.storage.type" = "binary"); + +SELECT * from t_ext_hbase_3; + +DROP table t_ext_hbase_3; diff --git hbase-handler/src/test/queries/hbase_binary_map_queries.q hbase-handler/src/test/queries/hbase_binary_map_queries.q new file mode 100644 index 0000000..255a2c7 --- /dev/null +++ hbase-handler/src/test/queries/hbase_binary_map_queries.q @@ -0,0 +1,225 @@ +DROP TABLE hbase_src; + +CREATE TABLE hbase_src(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + string_col STRING); + +INSERT OVERWRITE TABLE hbase_src + SELECT key, key, key, key, key, key, key, value + FROM src + WHERE key = 125 OR key = 126 OR key = 127; + +DROP TABLE t_hbase_maps; + +CREATE TABLE t_hbase_maps(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps"); + +INSERT OVERWRITE TABLE t_hbase_maps + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(true, true) + FROM hbase_src + WHERE key = 125; + +INSERT OVERWRITE TABLE t_hbase_maps + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(false, false) + FROM hbase_src + WHERE key = 126; + +SELECT * FROM t_hbase_maps ORDER BY key; + +DROP TABLE t_ext_hbase_maps; + +CREATE EXTERNAL TABLE t_ext_hbase_maps(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps"); + +SELECT * FROM t_ext_hbase_maps ORDER BY key; + +DROP TABLE t_ext_hbase_maps; + +DROP TABLE t_ext_hbase_maps_1; + +CREATE EXTERNAL TABLE t_ext_hbase_maps_1(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#bi:bi,cf-smallint:#bin:bin,cf-int:#bina:bina,cf-bigint:#binar:binar,cf-float:#binary:binary,cf-double:#b:b,cf-string:#bi:bi,cf-boolean:#bin:bin") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps"); + +SELECT * FROM t_ext_hbase_maps_1 ORDER BY key; + +DROP TABLE t_ext_hbase_maps_1; + +DROP TABLE t_ext_hbase_maps_2; + +CREATE EXTERNAL TABLE t_ext_hbase_maps_2(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ( +"hbase.table.name"="t_hive_maps", +"hbase.table.default.storage.type"="binary"); + +SELECT * FROM t_ext_hbase_maps_2 ORDER BY key; + +DROP TABLE t_ext_hbase_maps_2; + +DROP TABLE t_hbase_maps_1; + +CREATE TABLE t_hbase_maps_1(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#b:b,cf-smallint:#b:b,cf-int:#b:b,cf-bigint:#b:b,cf-float:#b:b,cf-double:#b:b,cf-string:#b:b,cf-boolean:#b:b") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1"); + +INSERT OVERWRITE TABLE t_hbase_maps_1 + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(true, true) + FROM hbase_src + WHERE key = 125; + +INSERT OVERWRITE TABLE t_hbase_maps_1 + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(false, false) + FROM hbase_src + WHERE key = 126; + +SELECT * FROM t_hbase_maps_1 ORDER BY key; + +DROP TABLE t_ext_hbase_maps_3; + +CREATE EXTERNAL TABLE t_ext_hbase_maps_3(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#bi:bi,cf-smallint:#bin:bin,cf-int:#bina:bina,cf-bigint:#binar:binar,cf-float:#binary:binary,cf-double:#b:b,cf-string:#bi:bi,cf-boolean:#bin:bin") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1"); + +SELECT * FROM t_ext_hbase_maps_3 ORDER BY key; + +DROP TABLE t_ext_hbase_maps_3; + +DROP TABLE t_ext_hbase_maps_4; + +CREATE EXTERNAL TABLE t_ext_hbase_maps_4(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1"); + +SELECT * FROM t_ext_hbase_maps_4 ORDER BY key; + +DROP TABLE t_ext_hbase_maps_4; + +DROP TABLE t_ext_hbase_maps_5; + +CREATE EXTERNAL TABLE t_ext_hbase_maps_5(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ( +"hbase.table.name"="t_hive_maps_1", +"hbase.table.default.storage.type"="binary"); + +SELECT * FROM t_ext_hbase_maps_5 ORDER BY key; + +DROP TABLE t_ext_hbase_maps_5; + +DROP TABLE t_hbase_maps_1; + +DROP TABLE t_hbase_maps; + +DROP TABLE hbase_src; diff --git hbase-handler/src/test/queries/hbase_binary_storage_queries.q hbase-handler/src/test/queries/hbase_binary_storage_queries.q new file mode 100644 index 0000000..b048871 --- /dev/null +++ hbase-handler/src/test/queries/hbase_binary_storage_queries.q @@ -0,0 +1,218 @@ +DROP TABLE t_hbase; + +CREATE TABLE t_hbase(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ("hbase.table.name" = "t_hive", + "hbase.table.default.storage.type" = "binary"); + +DESCRIBE FORMATTED t_hbase; + +INSERT OVERWRITE TABLE t_hbase +SELECT 'user1', 1, 1, 1, 1, 1.0, 1.0, true +FROM src +WHERE key=100 OR key=125 OR key=126; + +INSERT OVERWRITE TABLE t_hbase +SELECT 'user2', 127, 32767, 2147483647, 9223372036854775807, 211.31, 268746532.0571, false +FROM src +WHERE key=100 OR key=125 OR key=126; + +INSERT OVERWRITE TABLE t_hbase +SELECT 'user3', -128, -32768, -2147483648, -9223372036854775808, -201.17, -2110789.37145, true +FROM src +WHERE key=100 OR key=125 OR key=126; + +SELECT * FROM t_hbase; + +SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase +WHERE key='user1' OR key='user2' OR key='user3'; + +SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase; + +DROP TABLE t_hbase_1; + +CREATE EXTERNAL TABLE t_hbase_1(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:binarybyte#b,cf:binaryshort#b,cf:binaryint#b,cf:binarylong#b,cf:binaryfloat#b,cf:binarydouble#b,cf:binaryboolean#b") +TBLPROPERTIES ("hbase.table.name" = "t_hive"); + +DESCRIBE FORMATTED t_hbase_1; + +SELECT * FROM t_hbase_1; + +SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_1 +WHERE key='user1' OR key='user2' OR key='user3'; + +SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_1; + +DROP TABLE t_hbase_1; +DROP TABLE t_hbase; +DROP TABLE t_hbase_2; + +CREATE TABLE t_hbase_2(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ("hbase.table.name" = "t_hive_2"); + +INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user1', 1, 1, 1, 1, 1.0, 1.0, true +FROM src +WHERE key=100 OR key=125 OR key=126; + +INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user2', 127, 32767, 2147483647, 9223372036854775807, 211.31, 268746532.0571, false +FROM src +WHERE key=100 OR key=125 OR key=126; + +INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user3', -128, -32768, -2147483648, -9223372036854775808, -201.17, -2110789.37145, true +FROM src +WHERE key=100 OR key=125 OR key=126; + +SELECT * FROM t_hbase_2; + +SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_2 +WHERE key='user1' OR key='user2' OR key='user3'; + +SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_2; + +DROP TABLE t_hbase_3; + +CREATE EXTERNAL TABLE t_hbase_3(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:binarybyte#b,cf:binaryshort#b,cf:binaryint#b,cf:binarylong#b,cf:binaryfloat#b,cf:binarydouble#b,cf:binaryboolean#b") +TBLPROPERTIES ("hbase.table.name" = "t_hive_2"); + +SELECT * FROM t_hbase_3; + +SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_3 +WHERE key='user1' OR key='user2' OR key='user3'; + +SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_3; + +DROP TABLE t_hbase_3; + +DROP TABLE t_hbase_4; + +CREATE EXTERNAL TABLE t_hbase_4(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ( +"hbase.table.name" = "t_hive_2", +"hbase.table.default.storage.type" = "binary"); + +SELECT * FROM t_hbase_4; + +SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_4 +WHERE key='user1' OR key='user2' OR key='user3'; + +SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_4; + +DROP TABLE t_hbase_4; +DROP TABLE t_hbase_2; diff --git hbase-handler/src/test/results/hbase_binary_external_table_queries.q.out hbase-handler/src/test/results/hbase_binary_external_table_queries.q.out new file mode 100644 index 0000000..6de761a --- /dev/null +++ hbase-handler/src/test/results/hbase_binary_external_table_queries.q.out @@ -0,0 +1,118 @@ +PREHOOK: query: DROP TABLE t_ext_hbase_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_1 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:cq-boolean,cf:cq-byte,cf:cq-short,cf:cq-int,cf:cq-long,cf:cq-string,cf:cq-float,cf:cq-double") +TBLPROPERTIES ("hbase.table.name" = "HiveExternalTable") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_1 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:cq-boolean,cf:cq-byte,cf:cq-short,cf:cq-int,cf:cq-long,cf:cq-string,cf:cq-float,cf:cq-double") +TBLPROPERTIES ("hbase.table.name" = "HiveExternalTable") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_1 +PREHOOK: query: SELECT * FROM t_ext_hbase_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_1 +#### A masked pattern was here #### +key-1 NULL NULL NULL NULL NULL Hadoop, HBase, NULL NULL +key-2 NULL NULL NULL NULL NULL Hive NULL NULL +key-3 NULL NULL NULL NULL NULL Test Strings NULL NULL +PREHOOK: query: DROP TABLE t_ext_hbase_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_1 +PREHOOK: Output: default@t_ext_hbase_1 +POSTHOOK: query: DROP TABLE t_ext_hbase_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_1 +POSTHOOK: Output: default@t_ext_hbase_1 +PREHOOK: query: DROP TABLE t_ext_hbase_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_2 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:cq-boolean#b,cf:cq-byte#b,cf:cq-short#b,cf:cq-int#b,cf:cq-long#b,cf:cq-string#b,cf:cq-float#b,cf:cq-double#b") +TBLPROPERTIES ("hbase.table.name" = "HiveExternalTable") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_2 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:cq-boolean#b,cf:cq-byte#b,cf:cq-short#b,cf:cq-int#b,cf:cq-long#b,cf:cq-string#b,cf:cq-float#b,cf:cq-double#b") +TBLPROPERTIES ("hbase.table.name" = "HiveExternalTable") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_2 +PREHOOK: query: SELECT * FROM t_ext_hbase_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_2 +#### A masked pattern was here #### +key-1 true -128 -32768 -2147483648 -9223372036854775808 Hadoop, HBase, 1.4E-45 4.9E-324 +key-2 false -1 -1 -1 -1 Hive -1.0 -1.0 +key-3 true 127 32767 2147483647 9223372036854775807 Test Strings 3.4028235E38 1.7976931348623157E308 +PREHOOK: query: DROP TABLE t_ext_hbase_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_2 +PREHOOK: Output: default@t_ext_hbase_2 +POSTHOOK: query: DROP TABLE t_ext_hbase_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_2 +POSTHOOK: Output: default@t_ext_hbase_2 +PREHOOK: query: DROP TABLE t_ext_hbase_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_3 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:cq-boolean,cf:cq-byte,cf:cq-short,cf:cq-int,cf:cq-long,cf:cq-string,cf:cq-float,cf:cq-double") +TBLPROPERTIES ( +"hbase.table.name" = "HiveExternalTable", +"hbase.table.default.storage.type" = "binary") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_3 +(key STRING, c_bool BOOLEAN, c_byte TINYINT, c_short SMALLINT, + c_int INT, c_long BIGINT, c_string STRING, c_float FLOAT, c_double DOUBLE) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf:cq-boolean,cf:cq-byte,cf:cq-short,cf:cq-int,cf:cq-long,cf:cq-string,cf:cq-float,cf:cq-double") +TBLPROPERTIES ( +"hbase.table.name" = "HiveExternalTable", +"hbase.table.default.storage.type" = "binary") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_3 +PREHOOK: query: SELECT * from t_ext_hbase_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from t_ext_hbase_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_3 +#### A masked pattern was here #### +key-1 true -128 -32768 -2147483648 -9223372036854775808 Hadoop, HBase, 1.4E-45 4.9E-324 +key-2 false -1 -1 -1 -1 Hive -1.0 -1.0 +key-3 true 127 32767 2147483647 9223372036854775807 Test Strings 3.4028235E38 1.7976931348623157E308 +PREHOOK: query: DROP table t_ext_hbase_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_3 +PREHOOK: Output: default@t_ext_hbase_3 +POSTHOOK: query: DROP table t_ext_hbase_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_3 +POSTHOOK: Output: default@t_ext_hbase_3 diff --git hbase-handler/src/test/results/hbase_binary_map_queries.q.out hbase-handler/src/test/results/hbase_binary_map_queries.q.out new file mode 100644 index 0000000..8984da1 --- /dev/null +++ hbase-handler/src/test/results/hbase_binary_map_queries.q.out @@ -0,0 +1,869 @@ +PREHOOK: query: DROP TABLE hbase_src +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE hbase_src +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE hbase_src(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + string_col STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE hbase_src(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + string_col STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@hbase_src +PREHOOK: query: INSERT OVERWRITE TABLE hbase_src + SELECT key, key, key, key, key, key, key, value + FROM src + WHERE key = 125 OR key = 126 OR key = 127 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@hbase_src +POSTHOOK: query: INSERT OVERWRITE TABLE hbase_src + SELECT key, key, key, key, key, key, key, value + FROM src + WHERE key = 125 OR key = 126 OR key = 127 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@hbase_src +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_hbase_maps +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_hbase_maps +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE t_hbase_maps(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE t_hbase_maps(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_hbase_maps +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(true, true) + FROM hbase_src + WHERE key = 125 +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_src +PREHOOK: Output: default@t_hbase_maps +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(true, true) + FROM hbase_src + WHERE key = 125 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_src +POSTHOOK: Output: default@t_hbase_maps +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(false, false) + FROM hbase_src + WHERE key = 126 +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_src +PREHOOK: Output: default@t_hbase_maps +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(false, false) + FROM hbase_src + WHERE key = 126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_src +POSTHOOK: Output: default@t_hbase_maps +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_hbase_maps ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_maps +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_hbase_maps ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_maps +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {125:125} {125:125} {125:125} {125:125} {125.0:125.0} {125.0:125.0} {"125":"val_125"} {true:true} +126 {126:126} {126:126} {126:126} {126:126} {126.0:126.0} {126.0:126.0} {"126":"val_126"} {false:false} +PREHOOK: query: DROP TABLE t_ext_hbase_maps +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_maps +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_maps +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_ext_hbase_maps ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_maps +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_maps ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_maps +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {125:125} {125:125} {125:125} {125:125} {125.0:125.0} {125.0:125.0} {"125":"val_125"} {true:true} +126 {126:126} {126:126} {126:126} {126:126} {126.0:126.0} {126.0:126.0} {"126":"val_126"} {false:false} +PREHOOK: query: DROP TABLE t_ext_hbase_maps +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_maps +PREHOOK: Output: default@t_ext_hbase_maps +POSTHOOK: query: DROP TABLE t_ext_hbase_maps +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_maps +POSTHOOK: Output: default@t_ext_hbase_maps +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_ext_hbase_maps_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_1(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#bi:bi,cf-smallint:#bin:bin,cf-int:#bina:bina,cf-bigint:#binar:binar,cf-float:#binary:binary,cf-double:#b:b,cf-string:#bi:bi,cf-boolean:#bin:bin") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_1(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#bi:bi,cf-smallint:#bin:bin,cf-int:#bina:bina,cf-bigint:#binar:binar,cf-float:#binary:binary,cf-double:#b:b,cf-string:#bi:bi,cf-boolean:#bin:bin") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_maps_1 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_ext_hbase_maps_1 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_maps_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_maps_1 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_maps_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {49:49} {12594:12594} {} {} {2.5932638E-9:2.5932638E-9} {} {"125":"val_125"} {true:true} +126 {49:49} {12594:12594} {} {} {2.5933207E-9:2.5933207E-9} {} {"126":"val_126"} {true:true} +PREHOOK: query: DROP TABLE t_ext_hbase_maps_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_maps_1 +PREHOOK: Output: default@t_ext_hbase_maps_1 +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_maps_1 +POSTHOOK: Output: default@t_ext_hbase_maps_1 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_ext_hbase_maps_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_2(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ( +"hbase.table.name"="t_hive_maps", +"hbase.table.default.storage.type"="binary") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_2(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ( +"hbase.table.name"="t_hive_maps", +"hbase.table.default.storage.type"="binary") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_maps_2 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_ext_hbase_maps_2 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_maps_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_maps_2 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_maps_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {49:49} {12594:12594} {} {} {2.5932638E-9:2.5932638E-9} {} {"125":"val_125"} {true:true} +126 {49:49} {12594:12594} {} {} {2.5933207E-9:2.5933207E-9} {} {"126":"val_126"} {true:true} +PREHOOK: query: DROP TABLE t_ext_hbase_maps_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_maps_2 +PREHOOK: Output: default@t_ext_hbase_maps_2 +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_maps_2 +POSTHOOK: Output: default@t_ext_hbase_maps_2 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_hbase_maps_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_hbase_maps_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE t_hbase_maps_1(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#b:b,cf-smallint:#b:b,cf-int:#b:b,cf-bigint:#b:b,cf-float:#b:b,cf-double:#b:b,cf-string:#b:b,cf-boolean:#b:b") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE t_hbase_maps_1(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#b:b,cf-smallint:#b:b,cf-int:#b:b,cf-bigint:#b:b,cf-float:#b:b,cf-double:#b:b,cf-string:#b:b,cf-boolean:#b:b") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_hbase_maps_1 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps_1 + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(true, true) + FROM hbase_src + WHERE key = 125 +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_src +PREHOOK: Output: default@t_hbase_maps_1 +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps_1 + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(true, true) + FROM hbase_src + WHERE key = 125 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_src +POSTHOOK: Output: default@t_hbase_maps_1 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps_1 + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(false, false) + FROM hbase_src + WHERE key = 126 +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_src +PREHOOK: Output: default@t_hbase_maps_1 +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase_maps_1 + SELECT key, + map(tinyint_col, tinyint_col), + map(smallint_col, smallint_col), + map(int_col, int_col), + map(bigint_col, bigint_col), + map(float_col, float_col), + map(double_col, double_col), + map(key, string_col), + map(false, false) + FROM hbase_src + WHERE key = 126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_src +POSTHOOK: Output: default@t_hbase_maps_1 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_hbase_maps_1 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_maps_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_hbase_maps_1 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_maps_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {125:125} {125:125} {125:125} {125:125} {125.0:125.0} {125.0:125.0} {"125":"val_125"} {true:true} +126 {126:126} {126:126} {126:126} {126:126} {126.0:126.0} {126.0:126.0} {"126":"val_126"} {false:false} +PREHOOK: query: DROP TABLE t_ext_hbase_maps_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_3(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#bi:bi,cf-smallint:#bin:bin,cf-int:#bina:bina,cf-bigint:#binar:binar,cf-float:#binary:binary,cf-double:#b:b,cf-string:#bi:bi,cf-boolean:#bin:bin") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_3(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key#b,cf-tinyint:#bi:bi,cf-smallint:#bin:bin,cf-int:#bina:bina,cf-bigint:#binar:binar,cf-float:#binary:binary,cf-double:#b:b,cf-string:#bi:bi,cf-boolean:#bin:bin") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_maps_3 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_ext_hbase_maps_3 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_maps_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_maps_3 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_maps_3 +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {125:125} {125:125} {125:125} {125:125} {125.0:125.0} {125.0:125.0} {"125":"val_125"} {true:true} +126 {126:126} {126:126} {126:126} {126:126} {126.0:126.0} {126.0:126.0} {"126":"val_126"} {false:false} +PREHOOK: query: DROP TABLE t_ext_hbase_maps_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_maps_3 +PREHOOK: Output: default@t_ext_hbase_maps_3 +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_maps_3 +POSTHOOK: Output: default@t_ext_hbase_maps_3 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_ext_hbase_maps_4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_4(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_4(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ("hbase.table.name"="t_hive_maps_1") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_maps_4 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_ext_hbase_maps_4 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_maps_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_maps_4 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_maps_4 +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {} {} {} {} {} {} {"125":"val_125"} {} +126 {} {} {} {} {} {} {"126":"val_126"} {} +PREHOOK: query: DROP TABLE t_ext_hbase_maps_4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_maps_4 +PREHOOK: Output: default@t_ext_hbase_maps_4 +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_maps_4 +POSTHOOK: Output: default@t_ext_hbase_maps_4 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_ext_hbase_maps_5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_5(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ( +"hbase.table.name"="t_hive_maps_1", +"hbase.table.default.storage.type"="binary") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_ext_hbase_maps_5(key STRING, + tinyint_map_col MAP, + smallint_map_col MAP, + int_map_col MAP, + bigint_map_col MAP, + float_map_col MAP, + double_map_col MAP, + string_map_col MAP, + boolean_map_col MAP) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping"=":key,cf-tinyint:,cf-smallint:,cf-int:,cf-bigint:,cf-float:,cf-double:,cf-string:,cf-boolean:") +TBLPROPERTIES ( +"hbase.table.name"="t_hive_maps_1", +"hbase.table.default.storage.type"="binary") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_ext_hbase_maps_5 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM t_ext_hbase_maps_5 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t_ext_hbase_maps_5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_ext_hbase_maps_5 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_ext_hbase_maps_5 +#### A masked pattern was here #### +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +125 {125:125} {125:125} {125:125} {125:125} {125.0:125.0} {125.0:125.0} {"125":"val_125"} {true:true} +126 {126:126} {126:126} {126:126} {126:126} {126.0:126.0} {126.0:126.0} {"126":"val_126"} {false:false} +PREHOOK: query: DROP TABLE t_ext_hbase_maps_5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_ext_hbase_maps_5 +PREHOOK: Output: default@t_ext_hbase_maps_5 +POSTHOOK: query: DROP TABLE t_ext_hbase_maps_5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_ext_hbase_maps_5 +POSTHOOK: Output: default@t_ext_hbase_maps_5 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_hbase_maps_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_hbase_maps_1 +PREHOOK: Output: default@t_hbase_maps_1 +POSTHOOK: query: DROP TABLE t_hbase_maps_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_hbase_maps_1 +POSTHOOK: Output: default@t_hbase_maps_1 +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE t_hbase_maps +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_hbase_maps +PREHOOK: Output: default@t_hbase_maps +POSTHOOK: query: DROP TABLE t_hbase_maps +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_hbase_maps +POSTHOOK: Output: default@t_hbase_maps +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP TABLE hbase_src +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hbase_src +PREHOOK: Output: default@hbase_src +POSTHOOK: query: DROP TABLE hbase_src +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hbase_src +POSTHOOK: Output: default@hbase_src +POSTHOOK: Lineage: hbase_src.bigint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.double_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.float_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.int_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.smallint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.string_col SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: hbase_src.tinyint_col EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git hbase-handler/src/test/results/hbase_binary_storage_queries.q.out hbase-handler/src/test/results/hbase_binary_storage_queries.q.out new file mode 100644 index 0000000..306e666 --- /dev/null +++ hbase-handler/src/test/results/hbase_binary_storage_queries.q.out @@ -0,0 +1,658 @@ +PREHOOK: query: DROP TABLE t_hbase +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_hbase +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE t_hbase(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ("hbase.table.name" = "t_hive", + "hbase.table.default.storage.type" = "binary") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE t_hbase(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ("hbase.table.name" = "t_hive", + "hbase.table.default.storage.type" = "binary") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_hbase +PREHOOK: query: DESCRIBE FORMATTED t_hbase +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED t_hbase +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string from deserializer +tinyint_col tinyint from deserializer +smallint_col smallint from deserializer +int_col int from deserializer +bigint_col bigint from deserializer +float_col float from deserializer +double_col double from deserializer +boolean_col boolean from deserializer + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + hbase.table.default.storage.type binary + hbase.table.name t_hive + storage_handler org.apache.hadoop.hive.hbase.HBaseStorageHandler +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.hbase.HBaseSerDe +InputFormat: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat +OutputFormat: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + hbase.columns.mapping :key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#- + serialization.format 1 +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase +SELECT 'user1', 1, 1, 1, 1, 1.0, 1.0, true +FROM src +WHERE key=100 OR key=125 OR key=126 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t_hbase +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase +SELECT 'user1', 1, 1, 1, 1, 1.0, 1.0, true +FROM src +WHERE key=100 OR key=125 OR key=126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t_hbase +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase +SELECT 'user2', 127, 32767, 2147483647, 9223372036854775807, 211.31, 268746532.0571, false +FROM src +WHERE key=100 OR key=125 OR key=126 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t_hbase +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase +SELECT 'user2', 127, 32767, 2147483647, 9223372036854775807, 211.31, 268746532.0571, false +FROM src +WHERE key=100 OR key=125 OR key=126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t_hbase +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase +SELECT 'user3', -128, -32768, -2147483648, -9223372036854775808, -201.17, -2110789.37145, true +FROM src +WHERE key=100 OR key=125 OR key=126 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t_hbase +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase +SELECT 'user3', -128, -32768, -2147483648, -9223372036854775808, -201.17, -2110789.37145, true +FROM src +WHERE key=100 OR key=125 OR key=126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t_hbase +PREHOOK: query: SELECT * FROM t_hbase +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_hbase +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase +#### A masked pattern was here #### +user1 1 1 1 1 1.0 1.0 true +user2 127 32767 2147483647 9223372036854775807 211.31 2.687465320571E8 false +user3 -128 -32768 -2147483648 -9223372036854775808 -201.17 -2110789.37145 true +PREHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase +WHERE key='user1' OR key='user2' OR key='user3' +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase +#### A masked pattern was here #### +POSTHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase +WHERE key='user1' OR key='user2' OR key='user3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase +#### A masked pattern was here #### +1 1 1 1 1.0 1.0 true +127 32767 2147483647 9223372036854775807 211.31 2.687465320571E8 false +-128 -32768 -2147483648 -9223372036854775808 -201.17 -2110789.37145 true +PREHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase +#### A masked pattern was here #### +0 0 0 0 11.139999389648438 2.6663574368565E8 3 +PREHOOK: query: DROP TABLE t_hbase_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_hbase_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE t_hbase_1(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:binarybyte#b,cf:binaryshort#b,cf:binaryint#b,cf:binarylong#b,cf:binaryfloat#b,cf:binarydouble#b,cf:binaryboolean#b") +TBLPROPERTIES ("hbase.table.name" = "t_hive") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_hbase_1(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:binarybyte#b,cf:binaryshort#b,cf:binaryint#b,cf:binarylong#b,cf:binaryfloat#b,cf:binarydouble#b,cf:binaryboolean#b") +TBLPROPERTIES ("hbase.table.name" = "t_hive") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_hbase_1 +PREHOOK: query: DESCRIBE FORMATTED t_hbase_1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE FORMATTED t_hbase_1 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string from deserializer +tinyint_col tinyint from deserializer +smallint_col smallint from deserializer +int_col int from deserializer +bigint_col bigint from deserializer +float_col float from deserializer +double_col double from deserializer +boolean_col boolean from deserializer + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + EXTERNAL TRUE + hbase.table.name t_hive + storage_handler org.apache.hadoop.hive.hbase.HBaseStorageHandler +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.hbase.HBaseSerDe +InputFormat: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat +OutputFormat: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + hbase.columns.mapping :key#b,cf:binarybyte#b,cf:binaryshort#b,cf:binaryint#b,cf:binarylong#b,cf:binaryfloat#b,cf:binarydouble#b,cf:binaryboolean#b + serialization.format 1 +PREHOOK: query: SELECT * FROM t_hbase_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_hbase_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_1 +#### A masked pattern was here #### +user1 1 1 1 1 1.0 1.0 true +user2 127 32767 2147483647 9223372036854775807 211.31 2.687465320571E8 false +user3 -128 -32768 -2147483648 -9223372036854775808 -201.17 -2110789.37145 true +PREHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_1 +WHERE key='user1' OR key='user2' OR key='user3' +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_1 +WHERE key='user1' OR key='user2' OR key='user3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_1 +#### A masked pattern was here #### +1 1 1 1 1.0 1.0 true +127 32767 2147483647 9223372036854775807 211.31 2.687465320571E8 false +-128 -32768 -2147483648 -9223372036854775808 -201.17 -2110789.37145 true +PREHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_1 +#### A masked pattern was here #### +0 0 0 0 11.139999389648438 2.6663574368565E8 3 +PREHOOK: query: DROP TABLE t_hbase_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_hbase_1 +PREHOOK: Output: default@t_hbase_1 +POSTHOOK: query: DROP TABLE t_hbase_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_hbase_1 +POSTHOOK: Output: default@t_hbase_1 +PREHOOK: query: DROP TABLE t_hbase +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_hbase +PREHOOK: Output: default@t_hbase +POSTHOOK: query: DROP TABLE t_hbase +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_hbase +POSTHOOK: Output: default@t_hbase +PREHOOK: query: DROP TABLE t_hbase_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_hbase_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE t_hbase_2(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ("hbase.table.name" = "t_hive_2") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE t_hbase_2(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ("hbase.table.name" = "t_hive_2") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_hbase_2 +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user1', 1, 1, 1, 1, 1.0, 1.0, true +FROM src +WHERE key=100 OR key=125 OR key=126 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t_hbase_2 +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user1', 1, 1, 1, 1, 1.0, 1.0, true +FROM src +WHERE key=100 OR key=125 OR key=126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t_hbase_2 +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user2', 127, 32767, 2147483647, 9223372036854775807, 211.31, 268746532.0571, false +FROM src +WHERE key=100 OR key=125 OR key=126 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t_hbase_2 +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user2', 127, 32767, 2147483647, 9223372036854775807, 211.31, 268746532.0571, false +FROM src +WHERE key=100 OR key=125 OR key=126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t_hbase_2 +PREHOOK: query: INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user3', -128, -32768, -2147483648, -9223372036854775808, -201.17, -2110789.37145, true +FROM src +WHERE key=100 OR key=125 OR key=126 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t_hbase_2 +POSTHOOK: query: INSERT OVERWRITE TABLE t_hbase_2 +SELECT 'user3', -128, -32768, -2147483648, -9223372036854775808, -201.17, -2110789.37145, true +FROM src +WHERE key=100 OR key=125 OR key=126 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t_hbase_2 +PREHOOK: query: SELECT * FROM t_hbase_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_hbase_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_2 +#### A masked pattern was here #### +user1 1 1 1 1 1.0 1.0 true +user2 127 32767 2147483647 9223372036854775807 211.31 2.687465320571E8 false +user3 -128 -32768 -2147483648 -9223372036854775808 -201.17 -2110789.37145 true +PREHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_2 +WHERE key='user1' OR key='user2' OR key='user3' +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_2 +WHERE key='user1' OR key='user2' OR key='user3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_2 +#### A masked pattern was here #### +1 1 1 1 1.0 1.0 true +127 32767 2147483647 9223372036854775807 211.31 2.687465320571E8 false +-128 -32768 -2147483648 -9223372036854775808 -201.17 -2110789.37145 true +PREHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_2 +#### A masked pattern was here #### +0 0 0 0 11.139999389648438 2.6663574368565E8 3 +PREHOOK: query: DROP TABLE t_hbase_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_hbase_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE t_hbase_3(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:binarybyte#b,cf:binaryshort#b,cf:binaryint#b,cf:binarylong#b,cf:binaryfloat#b,cf:binarydouble#b,cf:binaryboolean#b") +TBLPROPERTIES ("hbase.table.name" = "t_hive_2") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_hbase_3(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#b,cf:binarybyte#b,cf:binaryshort#b,cf:binaryint#b,cf:binarylong#b,cf:binaryfloat#b,cf:binarydouble#b,cf:binaryboolean#b") +TBLPROPERTIES ("hbase.table.name" = "t_hive_2") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_hbase_3 +PREHOOK: query: SELECT * FROM t_hbase_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_hbase_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_3 +#### A masked pattern was here #### +user1 49 NULL NULL NULL NULL NULL true +user2 49 13106 842085431 4121411804481401392 1.0313938E-8 5.6030888442763564E-67 true +user3 45 11571 758264116 3258690996568012594 1.0128829E-11 5.581687380553606E-91 true +PREHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_3 +WHERE key='user1' OR key='user2' OR key='user3' +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_3 +WHERE key='user1' OR key='user2' OR key='user3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_3 +#### A masked pattern was here #### +49 NULL NULL NULL NULL NULL true +49 13106 842085431 4121411804481401392 1.0313938E-8 5.6030888442763564E-67 true +45 11571 758264116 3258690996568012594 1.0128829E-11 5.581687380553606E-91 true +PREHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_3 +#### A masked pattern was here #### +143 24677 1600349547 7380102801049413986 1.0324066977186741E-8 5.6030888442763564E-67 3 +PREHOOK: query: DROP TABLE t_hbase_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_hbase_3 +PREHOOK: Output: default@t_hbase_3 +POSTHOOK: query: DROP TABLE t_hbase_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_hbase_3 +POSTHOOK: Output: default@t_hbase_3 +PREHOOK: query: DROP TABLE t_hbase_4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE t_hbase_4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE t_hbase_4(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ( +"hbase.table.name" = "t_hive_2", +"hbase.table.default.storage.type" = "binary") +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE t_hbase_4(key STRING, + tinyint_col TINYINT, + smallint_col SMALLINT, + int_col INT, + bigint_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + boolean_col BOOLEAN) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key#-,cf:binarybyte#-,cf:binaryshort#-,cf:binaryint#-,cf:binarylong#-,cf:binaryfloat#-,cf:binarydouble#-,cf:binaryboolean#-") +TBLPROPERTIES ( +"hbase.table.name" = "t_hive_2", +"hbase.table.default.storage.type" = "binary") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t_hbase_4 +PREHOOK: query: SELECT * FROM t_hbase_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t_hbase_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_4 +#### A masked pattern was here #### +user1 49 NULL NULL NULL NULL NULL true +user2 49 13106 842085431 4121411804481401392 1.0313938E-8 5.6030888442763564E-67 true +user3 45 11571 758264116 3258690996568012594 1.0128829E-11 5.581687380553606E-91 true +PREHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_4 +WHERE key='user1' OR key='user2' OR key='user3' +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tinyint_col, + smallint_col, + int_col, + bigint_col, + float_col, + double_col, + boolean_col +FROM t_hbase_4 +WHERE key='user1' OR key='user2' OR key='user3' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_4 +#### A masked pattern was here #### +49 NULL NULL NULL NULL NULL true +49 13106 842085431 4121411804481401392 1.0313938E-8 5.6030888442763564E-67 true +45 11571 758264116 3258690996568012594 1.0128829E-11 5.581687380553606E-91 true +PREHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_hbase_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(tinyint_col), + sum(smallint_col), + sum(int_col), + sum(bigint_col), + sum(float_col), + sum(double_col), + count(boolean_col) +FROM t_hbase_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_hbase_4 +#### A masked pattern was here #### +143 24677 1600349547 7380102801049413986 1.0324066977186741E-8 5.6030888442763564E-67 3 +PREHOOK: query: DROP TABLE t_hbase_4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_hbase_4 +PREHOOK: Output: default@t_hbase_4 +POSTHOOK: query: DROP TABLE t_hbase_4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_hbase_4 +POSTHOOK: Output: default@t_hbase_4 +PREHOOK: query: DROP TABLE t_hbase_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_hbase_2 +PREHOOK: Output: default@t_hbase_2 +POSTHOOK: query: DROP TABLE t_hbase_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_hbase_2 +POSTHOOK: Output: default@t_hbase_2 diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index b8ff2b0..20758a7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.hive.serde2.lazy; import java.util.ArrayList; @@ -36,9 +37,17 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiv import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioBoolean; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioByte; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioDouble; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioFloat; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioInteger; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioLong; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioShort; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; @@ -47,6 +56,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * LazyFactory. @@ -55,11 +65,32 @@ import org.apache.hadoop.io.Text; public final class LazyFactory { /** + * Create a lazy primitive object instance given a primitive object inspector based on it's + * type. It takes a boolean switch to decide whether to return a binary or standard variant + * of the lazy object. + * + * @param poi PrimitiveObjectInspector + * @param typeBinary a switch to return either a LazyPrimtive class or it's binary + * companion + * @return LazyPrimitive + */ + public static LazyPrimitive + createLazyPrimitiveClass(PrimitiveObjectInspector poi, boolean typeBinary) { + if (typeBinary) { + return createLazyPrimitiveBinaryClass(poi); + } else { + return createLazyPrimitiveClass(poi); + } + } + + /** * Create a lazy primitive class given the type name. */ - public static LazyPrimitive createLazyPrimitiveClass( - PrimitiveObjectInspector oi) { + public static LazyPrimitive + createLazyPrimitiveClass(PrimitiveObjectInspector oi) { + PrimitiveCategory p = oi.getPrimitiveCategory(); + switch (p) { case BOOLEAN: return new LazyBoolean((LazyBooleanObjectInspector) oi); @@ -86,10 +117,35 @@ public final class LazyFactory { } } + public static LazyPrimitive + createLazyPrimitiveBinaryClass(PrimitiveObjectInspector poi) { + + PrimitiveCategory pc = poi.getPrimitiveCategory(); + + switch (pc) { + case BOOLEAN: + return new LazyDioBoolean((LazyBooleanObjectInspector) poi); + case BYTE: + return new LazyDioByte((LazyByteObjectInspector) poi); + case SHORT: + return new LazyDioShort((LazyShortObjectInspector) poi); + case INT: + return new LazyDioInteger((LazyIntObjectInspector) poi); + case LONG: + return new LazyDioLong((LazyLongObjectInspector) poi); + case FLOAT: + return new LazyDioFloat((LazyFloatObjectInspector) poi); + case DOUBLE: + return new LazyDioDouble((LazyDoubleObjectInspector) poi); + default: + throw new RuntimeException("Hive Internal Error: no LazyObject for " + poi); + } + } + /** * Create a hierarchical LazyObject based on the given typeInfo. */ - public static LazyObject createLazyObject(ObjectInspector oi) { + public static LazyObject createLazyObject(ObjectInspector oi) { ObjectInspector.Category c = oi.getCategory(); switch (c) { case PRIMITIVE: @@ -108,9 +164,28 @@ public final class LazyFactory { } /** + * Creates a LazyObject based on the LazyObjectInspector. Will create binary variants for + * primitive objects when the switch typeBinary is specified as true. + * + * @param oi ObjectInspector + * @param typeBinary Boolean value used as switch to return variants of LazyPrimitive + * objects which are initialized from a binary format for the data. + * @return LazyObject + */ + public static LazyObject + createLazyObject(ObjectInspector oi, boolean typeBinary) { + + if (oi.getCategory() == Category.PRIMITIVE) { + return createLazyPrimitiveClass((PrimitiveObjectInspector) oi, typeBinary); + } else { + return createLazyObject(oi); + } + } + + /** * Create a hierarchical ObjectInspector for LazyObject with the given * typeInfo. - * + * * @param typeInfo * The type information for the LazyObject * @param separator @@ -180,7 +255,7 @@ public final class LazyFactory { /** * Create a hierarchical ObjectInspector for LazyStruct with the given * columnNames and columnTypeInfos. - * + * * @param lastColumnTakesRest * whether the last column of the struct should take the rest of the * row if there are extra fields. @@ -205,7 +280,7 @@ public final class LazyFactory { /** * Create a hierarchical ObjectInspector for ColumnarStruct with the given * columnNames and columnTypeInfos. - * + * * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text, * boolean, byte) */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java index 996ff6f..10f4c05 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java @@ -21,17 +21,17 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; /** * LazyObject stores an object in a range of bytes in a byte[]. - * + * * A LazyObject can represent any primitive object or hierarchical object like * array, map or struct. */ public abstract class LazyObject extends LazyObjectBase { - OI oi; + protected OI oi; /** * Create a LazyObject. - * + * * @param oi * Derived classes can access meta information about this Lazy Object * (e.g, separator, nullSequence, escaper) from it. diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java index df0756f..ff277e0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java @@ -32,17 +32,17 @@ public abstract class LazyPrimitive { private static final Log LOG = LogFactory.getLog(LazyPrimitive.class); - LazyPrimitive(OI oi) { + protected LazyPrimitive(OI oi) { super(oi); } - LazyPrimitive(LazyPrimitive copy) { + protected LazyPrimitive(LazyPrimitive copy) { super(copy.oi); isNull = copy.isNull; } - T data; - boolean isNull = false; + protected T data; + protected boolean isNull = false; /** * Returns the primitive object represented by this LazyObject. This is useful diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 6b1766e..c0ca173 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -15,8 +15,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.hive.serde2.lazy; +import java.io.DataOutputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -239,6 +241,69 @@ public final class LazyUtils { } } + /** + * Write out a binary representation of a PrimitiveObject to a byte stream. + * + * @param out ByteStream.Output, an unsynchronized version of ByteArrayOutputStream, used as a + * backing buffer for the the DataOutputStream + * @param o the PrimitiveObject + * @param oi the PrimitiveObjectInspector + * @throws IOException on error during the write operation + */ + public static void writePrimitive( + OutputStream out, + Object o, + PrimitiveObjectInspector oi) throws IOException { + + DataOutputStream dos = new DataOutputStream(out); + + try { + switch (oi.getPrimitiveCategory()) { + case BOOLEAN: + boolean b = ((BooleanObjectInspector) oi).get(o); + dos.writeBoolean(b); + break; + + case BYTE: + byte bt = ((ByteObjectInspector) oi).get(o); + dos.writeByte(bt); + break; + + case SHORT: + short s = ((ShortObjectInspector) oi).get(o); + dos.writeShort(s); + break; + + case INT: + int i = ((IntObjectInspector) oi).get(o); + dos.writeInt(i); + break; + + case LONG: + long l = ((LongObjectInspector) oi).get(o); + dos.writeLong(l); + break; + + case FLOAT: + float f = ((FloatObjectInspector) oi).get(o); + dos.writeFloat(f); + break; + + case DOUBLE: + double d = ((DoubleObjectInspector) oi).get(o); + dos.writeDouble(d); + break; + + default: + throw new RuntimeException("Hive internal error."); + } + } finally { + // closing the underlying ByteStream should have no effect, the data should still be + // accessible + dos.close(); + } + } + public static int hashBytes(byte[] data, int start, int len) { int hash = 1; for (int i = start; i < len; i++) { @@ -287,5 +352,4 @@ public final class LazyUtils { private LazyUtils() { // prevent instantiation } - } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java new file mode 100644 index 0000000..48861bc --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazydio; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; +import org.apache.hadoop.io.BooleanWritable; + +/** + * LazyBooleanBinary for storing a boolean value as an BooleanWritable. This class complements class + * LazyBoolean. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * reads the boolean value stored from the default binary format. + */ +public class LazyDioBoolean extends LazyPrimitive { + + private ByteStream.Input in; + private DataInputStream din; + + public LazyDioBoolean(LazyBooleanObjectInspector oi) { + super(oi); + data = new BooleanWritable(); + } + + public LazyDioBoolean(LazyDioBoolean copy) { + super(copy); + data = new BooleanWritable(copy.data.get()); + } + + /* (non-Javadoc) + * This provides a LazyBoolean like class which can be initialized from data stored in a + * binary format. + * + * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init + * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + + boolean value = false; + + try { + in = new ByteStream.Input(bytes.getData(), start, length); + din = new DataInputStream(in); + value = din.readBoolean(); + data.set(value); + isNull = false; + } catch (IOException e) { + isNull = true; + } finally { + try { + din.close(); + } catch (IOException e) { + // swallow exception + } + + try { + in.close(); + } catch (IOException e) { + // swallow exception + } + } + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java new file mode 100644 index 0000000..26cc346 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazydio; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; + +/** + * LazyByteBinary for storing a byte value as a ByteWritable. This class complements class + * LazyByte. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * reads the raw byte value stored. + */ +public class LazyDioByte extends LazyPrimitive { + + private ByteStream.Input in; + private DataInputStream din; + + public LazyDioByte(LazyByteObjectInspector oi) { + super(oi); + data = new ByteWritable(); + } + + public LazyDioByte(LazyDioByte copy) { + super(copy); + data = new ByteWritable(copy.data.get()); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + + byte value = 0; + + try { + in = new ByteStream.Input(bytes.getData(), start, length); + din = new DataInputStream(in); + value = din.readByte(); + data.set(value); + isNull = false; + } catch (Exception e) { + isNull = true; + } finally { + try { + din.close(); + } catch (IOException e) { + // swallow exception + } + try { + in.close(); + } catch (IOException e) { + // swallow exception + } + } + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java new file mode 100644 index 0000000..fba2974 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazydio; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; + +/** + * LazyDoubleBinary for storing a double value as a DoubleWritable. This class complements class + * LazyDouble. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * reads the double value stored from the default binary format. + */ +public class LazyDioDouble extends LazyPrimitive { + + private ByteStream.Input in; + private DataInputStream din; + + public LazyDioDouble(LazyDoubleObjectInspector oi) { + super(oi); + data = new DoubleWritable(); + } + + LazyDioDouble(LazyDioDouble copy) { + super(copy); + data = new DoubleWritable(copy.data.get()); + } + + /* (non-Javadoc) + * This provides a LazyDouble like class which can be initialized from data stored in a + * binary format. + * + * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init + * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + + double value = 0.0; + + try { + in = new ByteStream.Input(bytes.getData(), start, length); + din = new DataInputStream(in); + value = din.readDouble(); + data.set(value); + isNull = false; + } catch (IOException e) { + isNull = true; + } finally { + try { + din.close(); + } catch (IOException e) { + // swallow exception + } + try { + in.close(); + } catch (IOException e) { + // swallow exception + } + } + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java new file mode 100644 index 0000000..a945f89 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazydio; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; +import org.apache.hadoop.io.FloatWritable; + +/** + * LazyFloatBinary for storing a float value as a FloatWritable. This class complements class + * LazyFloat. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * reads the float value stored from the default binary format. + */ +public class LazyDioFloat extends LazyPrimitive { + + private ByteStream.Input in; + private DataInputStream din; + + public LazyDioFloat(LazyFloatObjectInspector oi) { + super(oi); + data = new FloatWritable(); + } + + public LazyDioFloat(LazyDioFloat copy) { + super(copy); + data = new FloatWritable(copy.data.get()); + } + + /* (non-Javadoc) + * This provides a LazyFloat like class which can be initialized from data stored in a + * binary format. + * + * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init + * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + + float value = 0.0F; + + try { + in = new ByteStream.Input(bytes.getData(), start, length); + din = new DataInputStream(in); + value = din.readFloat(); + data.set(value); + isNull = false; + } catch (IOException e) { + isNull = true; + } finally { + try { + din.close(); + } catch (IOException e) { + // swallow exception + } + try { + in.close(); + } catch (IOException e) { + // swallow exception + } + } + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java new file mode 100644 index 0000000..66be75f --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazydio; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; +import org.apache.hadoop.io.IntWritable; + +/** + * LazyIntegerBinary for storing an int value as an IntWritable. This class complements class + * LazyInteger. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * reads the integer value stored from the default binary format. + */ +public class LazyDioInteger extends LazyPrimitive { + + private ByteStream.Input in; + private DataInputStream din; + + public LazyDioInteger(LazyIntObjectInspector oi) { + super(oi); + data = new IntWritable(); + } + + public LazyDioInteger(LazyDioInteger copy) { + super(copy); + data = new IntWritable(copy.data.get()); + } + + /* (non-Javadoc) + * This provides a LazyInteger like class which can be initialized from data stored in a + * binary format. + * + * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init + * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + + int value = 0; + + try { + in = new ByteStream.Input(bytes.getData(), start, length); + din = new DataInputStream(in); + value = din.readInt(); + data.set(value); + isNull = false; + } catch (IOException e) { + isNull = true; + } finally { + try { + din.close(); + } catch (IOException e) { + // swallow exception + } + try { + in.close(); + } catch (IOException e) { + // swallow exception + } + } + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java new file mode 100644 index 0000000..e08d521 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazydio; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; +import org.apache.hadoop.io.LongWritable; + +/** + * LazyLongBinary for storing a long value as a LongWritable. This class complements class + * LazyLong. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * reads the long value stored in the default binary format. + */ +public class LazyDioLong extends LazyPrimitive { + + private ByteStream.Input in; + private DataInputStream din; + + public LazyDioLong(LazyLongObjectInspector oi) { + super(oi); + data = new LongWritable(); + } + + public LazyDioLong(LazyDioLong copy) { + super(copy); + data = new LongWritable(copy.data.get()); + } + + /* (non-Javadoc) + * This provides a LazyLong like class which can be initialized from data stored in a + * binary format. + * + * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init + * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + + long value = 0; + + try { + in = new ByteStream.Input(bytes.getData(), start, length); + din = new DataInputStream(in); + value = din.readLong(); + data.set(value); + isNull = false; + } catch (IOException e) { + isNull = true; + } finally { + try { + din.close(); + } catch (IOException e) { + // swallow exception + } + try { + in.close(); + } catch (IOException e) { + // swallow exception + } + } + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java new file mode 100644 index 0000000..4bd87c4 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazydio; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector; + +/** + * LazyShortBinary for storing a short value as a ShortWritable. This class complements class + * LazyShort. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * reads the short value stored from the default binary format. + */ +public class LazyDioShort extends LazyPrimitive { + + private ByteStream.Input in; + private DataInputStream din; + + public LazyDioShort(LazyShortObjectInspector oi) { + super(oi); + data = new ShortWritable(); + } + + public LazyDioShort(LazyDioShort copy) { + super(copy); + data = new ShortWritable(copy.data.get()); + } + + /* (non-Javadoc) + * This provides a LazyShort like class which can be initialized from data stored in a + * binary format. + * + * @see org.apache.hadoop.hive.serde2.lazy.LazyObject#init + * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + + short value = 0; + + try { + in = new ByteStream.Input(bytes.getData(), start, length); + din = new DataInputStream(in); + value = din.readShort(); + data.set(value); + isNull = false; + } catch (Exception e) { + isNull = true; + } finally { + try { + din.close(); + } catch (IOException e) { + // swallow exception + } + try { + in.close(); + } catch (IOException e) { + // swallow exception + } + } + } +}