diff --git a/accumulo-handler/pom.xml b/accumulo-handler/pom.xml new file mode 100644 index 0000000..c3efd0a --- /dev/null +++ b/accumulo-handler/pom.xml @@ -0,0 +1,158 @@ + + + + 4.0.0 + + org.apache.hive + hive + 0.14.0-SNAPSHOT + ../pom.xml + + + hive-accumulo-handler + jar + Hive Accumulo Handler + + + .. + + + + + commons-lang + commons-lang + + + commons-logging + commons-logging + + + org.apache.accumulo + accumulo-core + + + org.apache.accumulo + accumulo-fate + + + org.apache.accumulo + accumulo-start + + + org.apache.accumulo + accumulo-trace + + + org.apache.hive + hive-common + ${project.version} + + + org.apache.hive + hive-metastore + ${project.version} + + + org.apache.hive + hive-serde + ${project.version} + + + org.apache.hive + hive-service + ${project.version} + + + org.apache.hive + hive-exec + ${project.version} + + + org.apache.hive + hive-shims + ${project.version} + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + junit + junit + test + + + org.mockito + mockito-all + test + + + + + + hadoop-1 + + + org.apache.hadoop + hadoop-core + ${hadoop-20S.version} + true + + + + + hadoop-2 + + + org.apache.hadoop + hadoop-common + ${hadoop-23.version} + true + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop-23.version} + true + + + + + + + ${basedir}/src/java + ${basedir}/src/test + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + + diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloConnectionParameters.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloConnectionParameters.java new file mode 100644 index 0000000..2b11f84 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloConnectionParameters.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.ZooKeeperInstance; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.hadoop.conf.Configuration; + +import com.google.common.base.Preconditions; + +/** + * + */ +public class AccumuloConnectionParameters { + public static final String USER_NAME = "accumulo.user.name"; + public static final String USER_PASS = "accumulo.user.pass"; + public static final String ZOOKEEPERS = "accumulo.zookeepers"; + public static final String INSTANCE_NAME = "accumulo.instance.name"; + public static final String TABLE_NAME = "accumulo.table.name"; + + public static final String USE_MOCK_INSTANCE = "accumulo.mock.instance"; + + protected Configuration conf; + protected boolean useMockInstance = false; + + public AccumuloConnectionParameters(Configuration conf) { + // TableDesc#getDeserializer will ultimately instantiate the AccumuloSerDe with a null + // Configuration + // We have to accept this and just fail late if data is attempted to be pulled from the + // Configuration + this.conf = conf; + } + + public Configuration getConf() { + return conf; + } + + public String getAccumuloUserName() { + Preconditions.checkNotNull(conf); + return conf.get(USER_NAME); + } + + public String getAccumuloPassword() { + Preconditions.checkNotNull(conf); + return conf.get(USER_PASS); + } + + public String getAccumuloInstanceName() { + Preconditions.checkNotNull(conf); + return conf.get(INSTANCE_NAME); + } + + public String getZooKeepers() { + Preconditions.checkNotNull(conf); + return conf.get(ZOOKEEPERS); + } + + public String getAccumuloTableName() { + Preconditions.checkNotNull(conf); + return conf.get(TABLE_NAME); + } + + public boolean useMockInstance() { + Preconditions.checkNotNull(conf); + return conf.getBoolean(USE_MOCK_INSTANCE, false); + } + + public Instance getInstance() { + String instanceName = getAccumuloInstanceName(); + + // Fail with a good message + if (null == instanceName) { + throw new IllegalArgumentException("Accumulo instance name must be provided in hiveconf using " + INSTANCE_NAME); + } + + if (useMockInstance()) { + return new MockInstance(instanceName); + } + + String zookeepers = getZooKeepers(); + + // Fail with a good message + if (null == zookeepers) { + throw new IllegalArgumentException("ZooKeeper quorum string must be provided in hiveconf using " + ZOOKEEPERS); + } + + return new ZooKeeperInstance(instanceName, zookeepers); + } + + public Connector getConnector() throws AccumuloException, AccumuloSecurityException { + Instance inst = getInstance(); + return getConnector(inst); + } + + public Connector getConnector(Instance inst) throws AccumuloException, AccumuloSecurityException { + String username = getAccumuloUserName(), password = getAccumuloPassword(); + + // Fail with a good message + if (null == username) { + throw new IllegalArgumentException("Accumulo user name must be provided in hiveconf using " + USER_NAME); + } + if (null == password) { + throw new IllegalArgumentException("Accumulo password must be provided in hiveconf using " + USER_PASS); + } + + return inst.getConnector(username, new PasswordToken(password)); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloHiveConstants.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloHiveConstants.java new file mode 100644 index 0000000..6cdfe1b --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloHiveConstants.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import java.nio.charset.Charset; + +/** + * + */ +public class AccumuloHiveConstants { + public static final String ROWID = ":rowID"; + public static final char COLON = ':', COMMA = ',', ESCAPE = '\\', POUND = '#', ASTERISK = '*'; + + public static final String ESCAPED_COLON = Character.toString(ESCAPE) + Character.toString(COLON); + + // Escape the escape + public static final String ESCAPED_COLON_REGEX = Character.toString(ESCAPE) + + Character.toString(ESCAPE) + Character.toString(COLON); + + public static final String ESCAPED_ASTERISK = Character.toString(ESCAPE) + + Character.toString(ASTERISK); + + // Escape the escape, and escape the asterisk + public static final String ESCAPED_ASERTISK_REGEX = Character.toString(ESCAPE) + + Character.toString(ESCAPE) + Character.toString(ESCAPE) + Character.toString(ASTERISK); + + public static final Charset UTF_8 = Charset.forName("UTF-8"); +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloHiveRow.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloHiveRow.java new file mode 100644 index 0000000..c6ee5c4 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloHiveRow.java @@ -0,0 +1,230 @@ +package org.apache.hadoop.hive.accumulo; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + +/** + * Holds column tuples for rowID. Each tuple contains column family label, qualifier label, and byte + * array value. + */ +public class AccumuloHiveRow implements Writable { + + private String rowId; + private List tuples = new ArrayList(); + + public AccumuloHiveRow() {} + + public AccumuloHiveRow(String rowId) { + this.rowId = rowId; + } + + public void setRowId(String rowId) { + this.rowId = rowId; + } + + public List getTuples() { + return Collections.unmodifiableList(tuples); + } + + /** + * @return true if this instance has a tuple containing fam and qual, false otherwise. + */ + public boolean hasFamAndQual(Text fam, Text qual) { + for (ColumnTuple tuple : tuples) { + if (tuple.getCf().equals(fam) && tuple.getCq().equals(qual)) { + return true; + } + } + return false; + } + + /** + * @return byte [] value for first tuple containing fam and qual or null if no match. + */ + public byte[] getValue(Text fam, Text qual) { + for (ColumnTuple tuple : tuples) { + if (tuple.getCf().equals(fam) && tuple.getCq().equals(qual)) { + return tuple.getValue(); + } + } + return null; + } + + public String getRowId() { + return rowId; + } + + public void clear() { + this.rowId = null; + this.tuples = new ArrayList(); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder("AccumuloHiveRow{"); + builder.append("rowId='").append(rowId).append("', tuples: "); + for (ColumnTuple tuple : tuples) { + builder.append(tuple.toString()); + builder.append("\n"); + } + return builder.toString(); + } + + @Override + public boolean equals(Object o) { + if (o instanceof AccumuloHiveRow) { + AccumuloHiveRow other = (AccumuloHiveRow) o; + if (null == rowId) { + if (null != other.rowId) { + return false; + } + } else if (!rowId.equals(other.rowId)) { + return false; + } + + return tuples.equals(other.tuples); + } + + return false; + } + + @Override + public void write(DataOutput dataOutput) throws IOException { + if (null != rowId) { + dataOutput.writeBoolean(true); + dataOutput.writeUTF(rowId); + } else { + dataOutput.writeBoolean(false); + } + int size = tuples.size(); + dataOutput.writeInt(size); + for (ColumnTuple tuple : tuples) { + Text cf = tuple.getCf(), cq = tuple.getCq(); + dataOutput.writeInt(cf.getLength()); + dataOutput.write(cf.getBytes(), 0, cf.getLength()); + dataOutput.writeInt(cq.getLength()); + dataOutput.write(cq.getBytes(), 0, cq.getLength()); + byte[] value = tuple.getValue(); + dataOutput.writeInt(value.length); + dataOutput.write(value); + } + } + + @Override + public void readFields(DataInput dataInput) throws IOException { + if (dataInput.readBoolean()) { + rowId = dataInput.readUTF(); + } + int size = dataInput.readInt(); + for (int i = 0; i < size; i++) { + int cfLength = dataInput.readInt(); + byte[] cfData = new byte[cfLength]; + dataInput.readFully(cfData, 0, cfLength); + Text cf = new Text(cfData); + int cqLength = dataInput.readInt(); + byte[] cqData = new byte[cqLength]; + dataInput.readFully(cqData, 0, cqLength); + Text cq = new Text(cqData); + int valSize = dataInput.readInt(); + byte[] val = new byte[valSize]; + for (int j = 0; j < valSize; j++) { + val[j] = dataInput.readByte(); + } + tuples.add(new ColumnTuple(cf, cq, val)); + } + } + + public void add(String cf, String qual, byte[] val) { + Preconditions.checkNotNull(cf); + Preconditions.checkNotNull(qual); + Preconditions.checkNotNull(val); + + add(new Text(cf), new Text(qual), val); + } + + public void add(Text cf, Text qual, byte[] val) { + Preconditions.checkNotNull(cf); + Preconditions.checkNotNull(qual); + Preconditions.checkNotNull(val); + + tuples.add(new ColumnTuple(cf, qual, val)); + } + + public static class ColumnTuple { + private final Text cf; + private final Text cq; + private final byte[] value; + + public ColumnTuple(Text cf, Text cq, byte[] value) { + this.value = value; + this.cf = cf; + this.cq = cq; + } + + public byte[] getValue() { + return value; + } + + public Text getCf() { + return cf; + } + + public Text getCq() { + return cq; + } + + @Override + public int hashCode() { + HashCodeBuilder hcb = new HashCodeBuilder(9683, 68783); + return hcb.append(cf).append(cq).append(value).toHashCode(); + } + + @Override + public boolean equals(Object o) { + if (o instanceof ColumnTuple) { + ColumnTuple other = (ColumnTuple) o; + if (null == cf) { + if (null != other.cf) { + return false; + } + } else if (!cf.equals(other.cf)) { + return false; + } + + if (null == cq) { + if (null != other.cq) { + return false; + } + } else if (!cq.equals(other.cq)) { + return false; + } + + if (null == value) { + if (null != other.value) { + return false; + } + } + + return Arrays.equals(value, other.value); + } + + return false; + } + + @Override + public String toString() { + return cf + " " + cq + " " + new String(value); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloStorageHandler.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloStorageHandler.java new file mode 100644 index 0000000..8051ebd --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/AccumuloStorageHandler.java @@ -0,0 +1,344 @@ +package org.apache.hadoop.hive.accumulo; + +import java.io.IOException; +import java.util.Map; +import java.util.Properties; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.fate.Fate; +import org.apache.accumulo.start.Main; +import org.apache.accumulo.trace.instrument.Tracer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableInputFormat; +import org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat; +import org.apache.hadoop.hive.accumulo.predicate.AccumuloPredicateHandler; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.util.StringUtils; +import org.apache.zookeeper.ZooKeeper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Create table mapping to Accumulo for Hive. Handle predicate pushdown if necessary. + */ +public class AccumuloStorageHandler extends DefaultStorageHandler implements HiveMetaHook, + HiveStoragePredicateHandler { + private static final Logger log = LoggerFactory.getLogger(AccumuloStorageHandler.class); + private static final String DEFAULT_PREFIX = "default"; + + protected AccumuloPredicateHandler predicateHandler = AccumuloPredicateHandler.getInstance(); + protected AccumuloConnectionParameters connectionParams; + protected Configuration conf; + + /** + * Push down table properties into the JobConf. + * + * @param desc + * Hive table description + * @param jobProps + * Properties that will be added to the JobConf by Hive + */ + @Override + public void configureTableJobProperties(TableDesc desc, Map jobProps) { + // Should not be getting invoked, configureInputJobProperties or configureOutputJobProperties + // should be invoked instead. + configureInputJobProperties(desc, jobProps); + configureOutputJobProperties(desc, jobProps); + } + + protected String getTableName(Table table) throws MetaException { + // Use TBLPROPERTIES + String tableName = table.getParameters().get(AccumuloSerDeParameters.TABLE_NAME); + + if (null != tableName) { + return tableName; + } + + // Then try SERDEPROPERTIES + tableName = table.getSd().getSerdeInfo().getParameters() + .get(AccumuloSerDeParameters.TABLE_NAME); + + if (null != tableName) { + return tableName; + } + + // Use the hive table name, ignoring the default database + if (DEFAULT_PREFIX.equals(table.getDbName())) { + return table.getTableName(); + } else { + return table.getDbName() + "." + table.getTableName(); + } + } + + protected String getTableName(TableDesc tableDesc) { + Properties props = tableDesc.getProperties(); + String tableName = props.getProperty(AccumuloSerDeParameters.TABLE_NAME); + if (null != tableName) { + return tableName; + } + + tableName = props.getProperty(hive_metastoreConstants.META_TABLE_NAME); + + if (tableName.startsWith(DEFAULT_PREFIX + ".")) { + return tableName.substring(DEFAULT_PREFIX.length() + 1); + } + + return tableName; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + connectionParams = new AccumuloConnectionParameters(conf); + } + + @SuppressWarnings("deprecation") + @Override + public Class getSerDeClass() { + return AccumuloSerDe.class; + } + + @Override + public HiveMetaHook getMetaHook() { + return this; + } + + @Override + public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException { + return null; + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + Properties props = tableDesc.getProperties(); + +// jobProperties.put(serdeConstants.LIST_COLUMNS, props.getProperty(serdeConstants.LIST_COLUMNS)); +// jobProperties.put(serdeConstants.LIST_TYPE_NAME, props.getProperty(serdeConstants.LIST_TYPE_NAME)); + + jobProperties.put(AccumuloSerDeParameters.COLUMN_MAPPINGS, + props.getProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS)); + + String tableName = props.getProperty(AccumuloSerDeParameters.TABLE_NAME); + if (null == tableName) { + tableName = getTableName(tableDesc); + } + jobProperties.put(AccumuloSerDeParameters.TABLE_NAME, + tableName); + + String useIterators = props.getProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY); + if (useIterators != null) { + if (!useIterators.equalsIgnoreCase("true") && !useIterators.equalsIgnoreCase("false")) { + throw new IllegalArgumentException("Expected value of true or false for " + + AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY); + } + + jobProperties.put(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, useIterators); + } + + String storageType = props.getProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE); + if (null != storageType) { + jobProperties.put(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, storageType); + } + + String authValue = props.getProperty(AccumuloSerDeParameters.AUTHORIZATIONS_KEY); + if (null != authValue) { + jobProperties.put(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, authValue); + } + + log.info("Computed input job properties of " + jobProperties); + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + Properties props = tableDesc.getProperties(); + // Adding these job properties will make them available to the OutputFormat in checkOutputSpecs + jobProperties.put(AccumuloSerDeParameters.COLUMN_MAPPINGS, + props.getProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS)); + + String tableName = props.getProperty(AccumuloSerDeParameters.TABLE_NAME); + if (null == tableName) { + tableName = getTableName(tableDesc); + } + jobProperties.put(AccumuloSerDeParameters.TABLE_NAME, tableName); + + if (props.containsKey(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE)) { + jobProperties.put(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, + props.getProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE)); + } + + if (props.containsKey(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY)) { + jobProperties.put(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY, + props.getProperty(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY)); + } + } + + @SuppressWarnings("rawtypes") + @Override + public Class getInputFormatClass() { + return HiveAccumuloTableInputFormat.class; + } + + @Override + @SuppressWarnings("rawtypes") + public Class getOutputFormatClass() { + return HiveAccumuloTableOutputFormat.class; + } + + @Override + public void preCreateTable(Table table) throws MetaException { + boolean isExternal = isExternalTable(table); + if (table.getSd().getLocation() != null) { + throw new MetaException("Location can't be specified for Accumulo"); + } + + Map serdeParams = table.getSd().getSerdeInfo().getParameters(); + String columnMapping = serdeParams.get(AccumuloSerDeParameters.COLUMN_MAPPINGS); + if (columnMapping == null) { + throw new MetaException(AccumuloSerDeParameters.COLUMN_MAPPINGS + + " missing from SERDEPROPERTIES"); + } + + try { + String tblName = getTableName(table); + Connector connector = connectionParams.getConnector(); + TableOperations tableOpts = connector.tableOperations(); + + // Attempt to create the table, taking EXTERNAL into consideration + if (!tableOpts.exists(tblName)) { + if (!isExternal) { + tableOpts.create(tblName); + } else { + throw new MetaException("Accumulo table " + tblName + + " doesn't exist even though declared external"); + } + } else { + if (!isExternal) { + throw new MetaException("Table " + tblName + + " already exists in Accumulo. Use CREATE EXTERNAL TABLE to register with Hive."); + } + } + } catch (AccumuloSecurityException e) { + throw new MetaException(StringUtils.stringifyException(e)); + } catch (TableExistsException e) { + throw new MetaException(StringUtils.stringifyException(e)); + } catch (AccumuloException e) { + throw new MetaException(StringUtils.stringifyException(e)); + } + } + + protected boolean isExternalTable(Table table) { + return MetaStoreUtils.isExternalTable(table); + } + + @Override + public void rollbackCreateTable(Table table) throws MetaException { + // Same as commitDropTable where we always delete the data (accumulo table) + commitDropTable(table, true); + } + + @Override + public void commitCreateTable(Table table) throws MetaException { + // do nothing + } + + @Override + public void commitDropTable(Table table, boolean deleteData) throws MetaException { + String tblName = getTableName(table); + if (!isExternalTable(table)) { + try { + if (deleteData) { + TableOperations tblOpts = connectionParams.getConnector().tableOperations(); + if (tblOpts.exists(tblName)) { + tblOpts.delete(tblName); + } + } + } catch (AccumuloException e) { + throw new MetaException(StringUtils.stringifyException(e)); + } catch (AccumuloSecurityException e) { + throw new MetaException(StringUtils.stringifyException(e)); + } catch (TableNotFoundException e) { + throw new MetaException(StringUtils.stringifyException(e)); + } + } + } + + @Override + public void preDropTable(Table table) throws MetaException { + // do nothing + } + + @Override + public void rollbackDropTable(Table table) throws MetaException { + // do nothing + } + + @Override + public DecomposedPredicate decomposePredicate(JobConf conf, Deserializer deserializer, + ExprNodeDesc desc) { + if (!(deserializer instanceof AccumuloSerDe)) { + throw new RuntimeException("Expected an AccumuloSerDe but got " + + deserializer.getClass().getName()); + } + + AccumuloSerDe serDe = (AccumuloSerDe) deserializer; + if (serDe.getIteratorPushdown()) { + return predicateHandler.decompose(conf, desc); + } else { + log.info("Set to ignore Accumulo iterator pushdown, skipping predicate handler."); + return null; + } + } + + @Override + public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { + try { + Utils.addDependencyJars(jobConf, Tracer.class, Fate.class, Connector.class, Main.class, + ZooKeeper.class, AccumuloStorageHandler.class); + } catch (IOException e) { + log.error("Could not add necessary Accumulo dependencies to classpath", e); + } + + Properties tblProperties = tableDesc.getProperties(); + AccumuloSerDeParameters serDeParams = null; + try { + serDeParams = new AccumuloSerDeParameters(jobConf, tblProperties, AccumuloSerDe.class.getName()); + } catch (SerDeException e) { + log.error("Could not instantiate AccumuloSerDeParameters", e); + return; + } + + try { + serDeParams.getRowIdFactory().addDependencyJars(jobConf); + } catch (IOException e) { + log.error("Could not add necessary dependencies for " + serDeParams.getRowIdFactory().getClass(), e); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloMap.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloMap.java new file mode 100644 index 0000000..effdc4b --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloMap.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveRow.ColumnTuple; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyMap; +import org.apache.hadoop.hive.serde2.lazy.LazyObject; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Charsets; + +/** + * A Hive Map created from some collection of Key-Values from one to many column families with one + * to many column qualifiers. + */ +public class LazyAccumuloMap extends LazyMap { + + protected AccumuloHiveRow sourceRow; + protected HiveAccumuloMapColumnMapping columnMapping; + + public LazyAccumuloMap(LazyMapObjectInspector oi) { + super(oi); + } + + public void init(AccumuloHiveRow row, HiveAccumuloMapColumnMapping columnMapping) { + this.sourceRow = row; + this.columnMapping = columnMapping; + + this.setParsed(false); + } + + protected void parse() { + if (null == this.cachedMap) { + this.cachedMap = new LinkedHashMap(); + } else { + this.cachedMap.clear(); + } + + LazyMapObjectInspector lazyMoi = getInspector(); + + Text cf = new Text(columnMapping.getColumnFamily()); + for (ColumnTuple tuple : sourceRow.getTuples()) { + String cq = tuple.getCq().toString(); + + if (!cf.equals(tuple.getCf()) || !cq.startsWith(columnMapping.getColumnQualifierPrefix())) { + // A column family or qualifier we don't want to include in the map + continue; + } + + // Because we append the cq prefix when serializing the column + // we should also remove it when pulling it from Accumulo + cq = cq.substring(columnMapping.getColumnQualifierPrefix().length()); + + // Keys are always primitive, respect the binary + LazyPrimitive key = LazyFactory + .createLazyPrimitiveClass((PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), + ColumnEncoding.BINARY == columnMapping.getKeyEncoding()); + + ByteArrayRef keyRef = new ByteArrayRef(); + keyRef.setData(cq.getBytes(Charsets.UTF_8)); + key.init(keyRef, 0, keyRef.getData().length); + + // Value can be anything, use the obj inspector and respect binary + LazyObject value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), + ColumnEncoding.BINARY == columnMapping.getValueEncoding()); + + ByteArrayRef valueRef = new ByteArrayRef(); + valueRef.setData(tuple.getValue()); + value.init(valueRef, 0, valueRef.getData().length); + + cachedMap.put(key, value); + } + + this.setParsed(true); + } + + /** + * Get the value in the map for the given key. + * + * @param key + * The key, a column qualifier, from the map + * @return The object in the map at the given key + */ + @Override + public Object getMapValueElement(Object key) { + if (!getParsed()) { + parse(); + } + + for (Map.Entry entry : cachedMap.entrySet()) { + LazyPrimitive lazyKey = (LazyPrimitive) entry.getKey(); + + // getWritableObject() will convert LazyPrimitive to actual primitive + // writable objects. + Object keyI = lazyKey.getWritableObject(); + if (keyI == null) { + continue; + } + if (keyI.equals(key)) { + // Got a match, return the value + LazyObject v = (LazyObject) entry.getValue(); + return v == null ? v : v.getObject(); + } + } + + return null; + } + + @Override + public Map getMap() { + if (!getParsed()) { + parse(); + } + return cachedMap; + } + + @Override + public int getMapSize() { + if (!getParsed()) { + parse(); + } + return cachedMap.size(); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloRow.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloRow.java new file mode 100644 index 0000000..03cd250 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/LazyAccumuloRow.java @@ -0,0 +1,140 @@ +package org.apache.hadoop.hive.accumulo; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.accumulo.serde.AccumuloRowIdFactory; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; +import org.apache.hadoop.hive.serde2.lazy.LazyStruct; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.io.Text; +import org.apache.log4j.Logger; + +/** + * + * Parses column tuples in each AccumuloHiveRow and creates Lazy objects for each field. + * + */ +public class LazyAccumuloRow extends LazyStruct { + private static final Logger log = Logger.getLogger(LazyAccumuloRow.class); + + private AccumuloHiveRow row; + private List columnMappings; + private ArrayList cachedList = new ArrayList(); + private AccumuloRowIdFactory rowIdFactory; + + public LazyAccumuloRow(LazySimpleStructObjectInspector inspector) { + super(inspector); + } + + public void init(AccumuloHiveRow hiveRow, List columnMappings, + AccumuloRowIdFactory rowIdFactory) { + this.row = hiveRow; + this.columnMappings = columnMappings; + this.rowIdFactory = rowIdFactory; + setParsed(false); + } + + private void parse() { + if (getFields() == null) { + // Will properly set string or binary serialization via createLazyField(...) + initLazyFields(oi.getAllStructFieldRefs()); + } + if (!getParsed()) { + Arrays.fill(getFieldInited(), false); + setParsed(true); + } + } + + @Override + public Object getField(int id) { + if (!getParsed()) { + parse(); + } + return uncheckedGetField(id); + } + + /* + * split pairs by delimiter. + */ + private Object uncheckedGetField(int id) { + if (!getFieldInited()[id]) { + ByteArrayRef ref; + ColumnMapping columnMapping = columnMappings.get(id); + + if (columnMapping instanceof HiveAccumuloMapColumnMapping) { + HiveAccumuloMapColumnMapping mapColumnMapping = (HiveAccumuloMapColumnMapping) columnMapping; + + LazyAccumuloMap map = (LazyAccumuloMap) getFields()[id]; + map.init(row, mapColumnMapping); + } else { + if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) { + // Use the rowID directly + ref = new ByteArrayRef(); + ref.setData(row.getRowId().getBytes()); + } else if (columnMapping instanceof HiveAccumuloColumnMapping) { + HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping; + + // Use the colfam and colqual to get the value + byte[] val = row.getValue(new Text(accumuloColumnMapping.getColumnFamily()), new Text( + accumuloColumnMapping.getColumnQualifier())); + if (val == null) { + return null; + } else { + ref = new ByteArrayRef(); + ref.setData(val); + } + } else { + log.error("Could not process ColumnMapping of type " + columnMapping.getClass() + + " at offset " + id + " in column mapping: " + columnMapping.getMappingSpec()); + throw new IllegalArgumentException("Cannot process ColumnMapping of type " + + columnMapping.getClass()); + } + + getFields()[id].init(ref, 0, ref.getData().length); + } + + // HIVE-3179 only init the field when it isn't null + getFieldInited()[id] = true; + } + + return getFields()[id].getObject(); + } + + @Override + public ArrayList getFieldsAsList() { + if (!getParsed()) { + parse(); + } + cachedList.clear(); + for (int i = 0; i < getFields().length; i++) { + cachedList.add(uncheckedGetField(i)); + } + return cachedList; + } + + @Override + protected LazyObjectBase createLazyField(int fieldID, StructField fieldRef) throws SerDeException { + final ColumnMapping columnMapping = columnMappings.get(fieldID); + + if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) { + return rowIdFactory.createRowId(fieldRef.getFieldObjectInspector()); + } else if (columnMapping instanceof HiveAccumuloMapColumnMapping) { + return new LazyAccumuloMap((LazyMapObjectInspector) fieldRef.getFieldObjectInspector()); + } else { + return LazyFactory.createLazyObject(fieldRef.getFieldObjectInspector(), + ColumnEncoding.BINARY == columnMapping.getEncoding()); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/Utils.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/Utils.java new file mode 100644 index 0000000..16abac2 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/Utils.java @@ -0,0 +1,352 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hive.accumulo; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.URL; +import java.net.URLDecoder; +import java.text.MessageFormat; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.jar.JarFile; +import java.util.jar.JarOutputStream; +import java.util.jar.Manifest; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Logger; + +import com.google.common.base.Preconditions; + +/** + * Accumulo doesn't have a TableMapReduceUtil.addDependencyJars method like HBase which is very + * helpful + */ +public class Utils { + private static final Logger log = Logger.getLogger(Utils.class); + + // Thanks, HBase + public static void addDependencyJars(Configuration conf, Class... classes) throws IOException { + FileSystem localFs = FileSystem.getLocal(conf); + Set jars = new HashSet(); + // Add jars that are already in the tmpjars variable + jars.addAll(conf.getStringCollection("tmpjars")); + + // add jars as we find them to a map of contents jar name so that we can + // avoid + // creating new jars for classes that have already been packaged. + Map packagedClasses = new HashMap(); + + // Add jars containing the specified classes + for (Class clazz : classes) { + if (clazz == null) + continue; + + Path path = findOrCreateJar(clazz, localFs, packagedClasses); + if (path == null) { + log.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); + continue; + } + if (!localFs.exists(path)) { + log.warn("Could not validate jar file " + path + " for class " + clazz); + continue; + } + jars.add(path.toString()); + } + if (jars.isEmpty()) + return; + + conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()]))); + } + + /** + * If org.apache.hadoop.util.JarFinder is available (0.23+ hadoop), finds the Jar for a class or + * creates it if it doesn't exist. If the class is in a directory in the classpath, it creates a + * Jar on the fly with the contents of the directory and returns the path to that Jar. If a Jar is + * created, it is created in the system temporary directory. Otherwise, returns an existing jar + * that contains a class of the same name. Maintains a mapping from jar contents to the tmp jar + * created. + * + * @param my_class + * the class to find. + * @param fs + * the FileSystem with which to qualify the returned path. + * @param packagedClasses + * a map of class name to path. + * @return a jar file that contains the class. + * @throws IOException + */ + @SuppressWarnings("deprecation") + private static Path findOrCreateJar(Class my_class, FileSystem fs, + Map packagedClasses) throws IOException { + // attempt to locate an existing jar for the class. + String jar = findContainingJar(my_class, packagedClasses); + if (null == jar || jar.isEmpty()) { + jar = getJar(my_class); + updateMap(jar, packagedClasses); + } + + if (null == jar || jar.isEmpty()) { + return null; + } + + log.debug(String.format("For class %s, using jar %s", my_class.getName(), jar)); + return new Path(jar).makeQualified(fs); + } + + /** + * Add entries to packagedClasses corresponding to class files contained in + * jar. + * + * @param jar + * The jar who's content to list. + * @param packagedClasses + * map[class -> jar] + */ + private static void updateMap(String jar, Map packagedClasses) throws IOException { + if (null == jar || jar.isEmpty()) { + return; + } + ZipFile zip = null; + try { + zip = new ZipFile(jar); + for (Enumeration iter = zip.entries(); iter.hasMoreElements();) { + ZipEntry entry = iter.nextElement(); + if (entry.getName().endsWith("class")) { + packagedClasses.put(entry.getName(), jar); + } + } + } finally { + if (null != zip) + zip.close(); + } + } + + /** + * Find a jar that contains a class of the same name, if any. It will return a jar file, even if + * that is not the first thing on the class path that has a class with the same name. Looks first + * on the classpath and then in the packagedClasses map. + * + * @param my_class + * the class to find. + * @return a jar file that contains the class, or null. + * @throws IOException + */ + private static String findContainingJar(Class my_class, Map packagedClasses) + throws IOException { + ClassLoader loader = my_class.getClassLoader(); + String class_file = my_class.getName().replaceAll("\\.", "/") + ".class"; + + // first search the classpath + for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) { + URL url = itr.nextElement(); + if ("jar".equals(url.getProtocol())) { + String toReturn = url.getPath(); + if (toReturn.startsWith("file:")) { + toReturn = toReturn.substring("file:".length()); + } + // URLDecoder is a misnamed class, since it actually decodes + // x-www-form-urlencoded MIME type rather than actual + // URL encoding (which the file path has). Therefore it would + // decode +s to ' 's which is incorrect (spaces are actually + // either unencoded or encoded as "%20"). Replace +s first, so + // that they are kept sacred during the decoding process. + toReturn = toReturn.replaceAll("\\+", "%2B"); + toReturn = URLDecoder.decode(toReturn, "UTF-8"); + return toReturn.replaceAll("!.*$", ""); + } + } + + // now look in any jars we've packaged using JarFinder. Returns null + // when + // no jar is found. + return packagedClasses.get(class_file); + } + + /** + * Invoke 'getJar' on a JarFinder implementation. Useful for some job configuration contexts + * (HBASE-8140) and also for testing on MRv2. First check if we have HADOOP-9426. Lacking that, + * fall back to the backport. + * + * @param my_class + * the class to find. + * @return a jar file that contains the class, or null. + */ + private static String getJar(Class my_class) { + String ret = null; + String hadoopJarFinder = "org.apache.hadoop.util.JarFinder"; + Class jarFinder = null; + try { + log.debug("Looking for " + hadoopJarFinder + "."); + jarFinder = Class.forName(hadoopJarFinder); + log.debug(hadoopJarFinder + " found."); + Method getJar = jarFinder.getMethod("getJar", Class.class); + ret = (String) getJar.invoke(null, my_class); + } catch (ClassNotFoundException e) { + log.debug("Using backported JarFinder."); + ret = jarFinderGetJar(my_class); + } catch (InvocationTargetException e) { + // function was properly called, but threw it's own exception. + // Unwrap it + // and pass it on. + throw new RuntimeException(e.getCause()); + } catch (Exception e) { + // toss all other exceptions, related to reflection failure + throw new RuntimeException("getJar invocation failed.", e); + } + + return ret; + } + + /** + * Returns the full path to the Jar containing the class. It always return a JAR. + * + * @param klass + * class. + * + * @return path to the Jar containing the class. + */ + @SuppressWarnings("rawtypes") + public static String jarFinderGetJar(Class klass) { + Preconditions.checkNotNull(klass, "klass"); + ClassLoader loader = klass.getClassLoader(); + if (loader != null) { + String class_file = klass.getName().replaceAll("\\.", "/") + ".class"; + try { + for (Enumeration itr = loader.getResources(class_file); itr.hasMoreElements();) { + URL url = (URL) itr.nextElement(); + String path = url.getPath(); + if (path.startsWith("file:")) { + path = path.substring("file:".length()); + } + path = URLDecoder.decode(path, "UTF-8"); + if ("jar".equals(url.getProtocol())) { + path = URLDecoder.decode(path, "UTF-8"); + return path.replaceAll("!.*$", ""); + } else if ("file".equals(url.getProtocol())) { + String klassName = klass.getName(); + klassName = klassName.replace(".", "/") + ".class"; + path = path.substring(0, path.length() - klassName.length()); + File baseDir = new File(path); + File testDir = new File(System.getProperty("test.build.dir", "target/test-dir")); + testDir = testDir.getAbsoluteFile(); + if (!testDir.exists()) { + testDir.mkdirs(); + } + File tempJar = File.createTempFile("hadoop-", "", testDir); + tempJar = new File(tempJar.getAbsolutePath() + ".jar"); + createJar(baseDir, tempJar); + return tempJar.getAbsolutePath(); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return null; + } + + private static void copyToZipStream(InputStream is, ZipEntry entry, ZipOutputStream zos) + throws IOException { + zos.putNextEntry(entry); + byte[] arr = new byte[4096]; + int read = is.read(arr); + while (read > -1) { + zos.write(arr, 0, read); + read = is.read(arr); + } + is.close(); + zos.closeEntry(); + } + + public static void jarDir(File dir, String relativePath, ZipOutputStream zos) throws IOException { + Preconditions.checkNotNull(relativePath, "relativePath"); + Preconditions.checkNotNull(zos, "zos"); + + // by JAR spec, if there is a manifest, it must be the first entry in + // the + // ZIP. + File manifestFile = new File(dir, JarFile.MANIFEST_NAME); + ZipEntry manifestEntry = new ZipEntry(JarFile.MANIFEST_NAME); + if (!manifestFile.exists()) { + zos.putNextEntry(manifestEntry); + new Manifest().write(new BufferedOutputStream(zos)); + zos.closeEntry(); + } else { + InputStream is = new FileInputStream(manifestFile); + copyToZipStream(is, manifestEntry, zos); + } + zos.closeEntry(); + zipDir(dir, relativePath, zos, true); + zos.close(); + } + + private static void zipDir(File dir, String relativePath, ZipOutputStream zos, boolean start) + throws IOException { + String[] dirList = dir.list(); + for (String aDirList : dirList) { + File f = new File(dir, aDirList); + if (!f.isHidden()) { + if (f.isDirectory()) { + if (!start) { + ZipEntry dirEntry = new ZipEntry(relativePath + f.getName() + "/"); + zos.putNextEntry(dirEntry); + zos.closeEntry(); + } + String filePath = f.getPath(); + File file = new File(filePath); + zipDir(file, relativePath + f.getName() + "/", zos, false); + } else { + String path = relativePath + f.getName(); + if (!path.equals(JarFile.MANIFEST_NAME)) { + ZipEntry anEntry = new ZipEntry(path); + InputStream is = new FileInputStream(f); + copyToZipStream(is, anEntry, zos); + } + } + } + } + } + + private static void createJar(File dir, File jarFile) throws IOException { + Preconditions.checkNotNull(dir, "dir"); + Preconditions.checkNotNull(jarFile, "jarFile"); + File jarDir = jarFile.getParentFile(); + if (!jarDir.exists()) { + if (!jarDir.mkdirs()) { + throw new IOException(MessageFormat.format("could not create dir [{0}]", jarDir)); + } + } + JarOutputStream zos = new JarOutputStream(new FileOutputStream(jarFile)); + jarDir(dir, "", zos); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnEncoding.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnEncoding.java new file mode 100644 index 0000000..8e10313 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnEncoding.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import java.util.HashMap; +import java.util.Map.Entry; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + +/** + * Encapsulate the encoding of values within the given column in Accumulo + */ +public enum ColumnEncoding { + STRING("string", "s"), BINARY("binary", "b"); + + private static final HashMap CODE_CACHE = new HashMap(), + NAME_CACHE = new HashMap(); + + static { + CODE_CACHE.put(STRING.getCode(), STRING); + CODE_CACHE.put(BINARY.getCode(), BINARY); + + NAME_CACHE.put(STRING.getName(), STRING); + NAME_CACHE.put(BINARY.getName(), BINARY); + } + + private final String name; + private final String code; + + private ColumnEncoding(String name, String code) { + this.name = name; + this.code = code; + } + + public String getName() { + return this.name; + } + + public String getCode() { + return code; + } + + /** + * Get the ColumnEncoding which has the given code. + * + * @param code + * The one-character 'code' which uniquely identifies the ColumnEncoding + * @return The ColumnEncoding with the code equal to the provided argument + */ + public static ColumnEncoding fromCode(String code) { + if (!CODE_CACHE.containsKey(code)) { + throw new IllegalArgumentException("No ColumnEncoding defined with code " + code); + } + + return CODE_CACHE.get(code); + } + + public static ColumnEncoding fromName(String name) { + if (!NAME_CACHE.containsKey(name)) { + throw new IllegalArgumentException("No ColumnEncoding defined with name " + name); + } + + return NAME_CACHE.get(name); + } + + public static ColumnEncoding get(String nameOrCode) { + ColumnEncoding encoding = CODE_CACHE.get(nameOrCode); + if (null != encoding) { + return encoding; + } + + encoding = NAME_CACHE.get(nameOrCode); + if (null != encoding) { + return encoding; + } + + throw new IllegalArgumentException("No ColumnEncoding defined for " + nameOrCode); + } + + public static ColumnEncoding getFromMapping(String columnMapping) { + Preconditions.checkNotNull(columnMapping); + + String encoding = getColumnEncoding(columnMapping); + + return get(encoding); + } + + /** + * Determines if a custom encoding was specified for the give column. + * + * @param columnMapping + * The mapping from Hive column to an Accumulo column + * @return True if the column mapping string specifies an encoding, false otherwise + */ + public static boolean hasColumnEncoding(String columnMapping) { + Preconditions.checkNotNull(columnMapping); + + int offset = columnMapping.lastIndexOf(AccumuloHiveConstants.POUND); + + // Make sure that the '#' wasn't escaped + if (0 < offset && AccumuloHiveConstants.ESCAPE == columnMapping.charAt(offset - 1)) { + // The encoding name/codes don't contain pound signs + return false; + } + + return -1 != offset; + } + + public static String getColumnEncoding(String columnMapping) { + int offset = columnMapping.lastIndexOf(AccumuloHiveConstants.POUND); + + // Make sure that the '#' wasn't escaped + if (0 < offset && AccumuloHiveConstants.ESCAPE == columnMapping.charAt(offset - 1)) { + throw new IllegalArgumentException("Column mapping did not contain a column encoding: " + + columnMapping); + } + + return columnMapping.substring(offset + 1); + } + + public static ColumnEncoding getDefault() { + return STRING; + } + + /** + * Removes the column encoding code and separator from the original column mapping string. Throws + * an IllegalArgumentException if this method is called on a string that doesn't contain a code. + * + * @param columnMapping + * The mapping from Hive column to Accumulo column + * @return The column mapping with the code removed + */ + public static String stripCode(String columnMapping) { + Preconditions.checkNotNull(columnMapping); + + int offset = columnMapping.lastIndexOf(AccumuloHiveConstants.POUND); + if (-1 == offset + || (0 < offset && AccumuloHiveConstants.ESCAPE == columnMapping.charAt(offset - 1))) { + throw new IllegalArgumentException( + "Provided column mapping does not define a column encoding"); + } + + return columnMapping.substring(0, offset); + } + + public static boolean isMapEncoding(String columnEncoding) { + return -1 != columnEncoding.indexOf(AccumuloHiveConstants.COLON); + } + + public static Entry getMapEncoding(String columnEncoding) { + int index = columnEncoding.indexOf(AccumuloHiveConstants.COLON); + if (-1 == index) { + throw new IllegalArgumentException( + "Serialized column encoding did not contain a pair of encodings to split"); + } + + String encoding1 = columnEncoding.substring(0, index), encoding2 = columnEncoding + .substring(index + 1); + + return Maps.immutableEntry(get(encoding1), get(encoding2)); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMapper.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMapper.java new file mode 100644 index 0000000..ff9db46 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMapper.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Logger; + +import com.google.common.base.Preconditions; + +/** + * + */ +public class ColumnMapper { + private static final Logger log = Logger.getLogger(ColumnMapper.class); + + private List columnMappings; + private int rowIdOffset; + private HiveAccumuloRowIdColumnMapping rowIdMapping = null; + private final ColumnEncoding defaultEncoding; + + /** + * Create a mapping from Hive columns (rowID and column) to Accumulo columns (column family and + * qualifier). The ordering of the {@link ColumnMapping}s is important as it aligns with the + * ordering of the columns for the Hive table schema. + * + * @param serializedColumnMappings + * Comma-separated list of designators that map to Accumulo columns whose offsets + * correspond to the Hive table schema + * @throws TooManyAccumuloColumnsException + */ + public ColumnMapper(String serializedColumnMappings, String defaultStorageType, + List columnNames, List columnTypes) throws TooManyAccumuloColumnsException { + Preconditions.checkNotNull(serializedColumnMappings); + + String[] parsedColumnMappingValue = StringUtils.split(serializedColumnMappings, + AccumuloHiveConstants.COMMA); + columnMappings = new ArrayList(parsedColumnMappingValue.length); + rowIdOffset = -1; + + // Determine the default encoding type (specified on the table, or the global default + // if none was provided) + if (null == defaultStorageType || "".equals(defaultStorageType)) { + defaultEncoding = ColumnEncoding.getDefault(); + } else { + defaultEncoding = ColumnEncoding.get(defaultStorageType.toLowerCase()); + } + + if (parsedColumnMappingValue.length > columnNames.size()) { + throw new TooManyAccumuloColumnsException("Found " + parsedColumnMappingValue.length + + " columns, but only know of " + columnNames.size() + " Hive column names"); + } + + if (parsedColumnMappingValue.length > columnTypes.size()) { + throw new TooManyAccumuloColumnsException("Found " + parsedColumnMappingValue.length + + " columns, but only know of " + columnNames.size() + " Hive column types"); + } + + for (int i = 0; i < parsedColumnMappingValue.length; i++) { + String columnMappingStr = parsedColumnMappingValue[i]; + + // Create the mapping for this column, with configured encoding + ColumnMapping columnMapping = ColumnMappingFactory.get(columnMappingStr, defaultEncoding, + columnNames.get(i), columnTypes.get(i)); + + if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) { + if (-1 != rowIdOffset) { + throw new IllegalArgumentException( + "Column mapping should only have one definition with a value of " + + AccumuloHiveConstants.ROWID); + } + + rowIdOffset = i; + rowIdMapping = (HiveAccumuloRowIdColumnMapping) columnMapping; + } + + columnMappings.add(columnMapping); + } + } + + public int size() { + return columnMappings.size(); + } + + public ColumnMapping get(int i) { + return columnMappings.get(i); + } + + public List getColumnMappings() { + return Collections.unmodifiableList(columnMappings); + } + + public boolean hasRowIdMapping() { + return null != rowIdMapping; + } + + public HiveAccumuloRowIdColumnMapping getRowIdMapping() { + return rowIdMapping; + } + + public int getRowIdOffset() { + return rowIdOffset; + } + + public String getTypesString() { + StringBuilder sb = new StringBuilder(); + for (ColumnMapping columnMapping : columnMappings) { + if (sb.length() > 0) { + sb.append(AccumuloHiveConstants.COLON); + } + + if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) { + // the rowID column is a string + sb.append(serdeConstants.STRING_TYPE_NAME); + } else if (columnMapping instanceof HiveAccumuloColumnMapping) { + // a normal column is also a string + sb.append(serdeConstants.STRING_TYPE_NAME); + } else if (columnMapping instanceof HiveAccumuloMapColumnMapping) { + // TODO can we be more precise than string,string? + sb.append(serdeConstants.MAP_TYPE_NAME).append("<").append(serdeConstants.STRING_TYPE_NAME) + .append(",").append(serdeConstants.STRING_TYPE_NAME).append(">"); + } else { + throw new IllegalArgumentException("Cannot process ColumnMapping of type " + + columnMapping.getClass().getName()); + } + } + + return sb.toString(); + } + + public ColumnMapping getColumnMappingForHiveColumn(List hiveColumns, String hiveColumnName) { + Preconditions.checkNotNull(hiveColumns); + Preconditions.checkNotNull(hiveColumnName); + Preconditions.checkArgument(columnMappings.size() <= hiveColumns.size(), + "Expected equal number of column mappings and Hive columns, " + columnMappings + ", " + + hiveColumns); + + int hiveColumnOffset = 0; + for (; hiveColumnOffset < hiveColumns.size() && hiveColumnOffset < columnMappings.size(); hiveColumnOffset++) { + if (hiveColumns.get(hiveColumnOffset).equals(hiveColumnName)) { + return columnMappings.get(hiveColumnOffset); + } + } + + log.error("Could not find offset for Hive column with name '" + hiveColumnName + + "' with columns " + hiveColumns); + throw new IllegalArgumentException("Could not find offset for Hive column with name " + + hiveColumnName); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(32); + sb.append("[").append(this.getClass().getSimpleName()).append(" "); + sb.append(columnMappings).append(", rowIdOffset: ").append(this.rowIdOffset) + .append(", defaultEncoding: "); + sb.append(this.defaultEncoding).append("]"); + return sb.toString(); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMapping.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMapping.java new file mode 100644 index 0000000..e1d19f9 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMapping.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.base.Preconditions; + +/** + * + */ +public abstract class ColumnMapping { + + // SerDe property for how the Hive column maps to Accumulo + protected final String mappingSpec; + + // The manner in which the values in this column are de/serialized from/to Accumulo + protected final ColumnEncoding encoding; + + // The name of the Hive column + protected final String columnName; + + // The type of the Hive column + // Cannot store the actual TypeInfo because that would require + // Hive jars on the Accumulo classpath which we don't want + protected final String columnType; + + protected ColumnMapping(String mappingSpec, ColumnEncoding encoding, String columnName, + String columnType) { + Preconditions.checkNotNull(mappingSpec); + Preconditions.checkNotNull(encoding); + Preconditions.checkNotNull(columnName); + Preconditions.checkNotNull(columnType); + + this.mappingSpec = mappingSpec; + this.encoding = encoding; + this.columnName = columnName; + this.columnType = columnType; + } + + protected ColumnMapping(String mappingSpec, ColumnEncoding encoding, String columnName, + TypeInfo columnType) { + Preconditions.checkNotNull(mappingSpec); + Preconditions.checkNotNull(encoding); + Preconditions.checkNotNull(columnName); + Preconditions.checkNotNull(columnType); + + this.mappingSpec = mappingSpec; + this.encoding = encoding; + this.columnName = columnName; + this.columnType = columnType.getTypeName(); + } + + /** + * The property defining how this Column is mapped into Accumulo + */ + public String getMappingSpec() { + return mappingSpec; + } + + /** + * The manner in which the value is encoded in Accumulo + */ + public ColumnEncoding getEncoding() { + return encoding; + } + + /** + * The name of the Hive column this is mapping + */ + public String getColumnName() { + return columnName; + } + + /** + * The @{link TypeInfo} of the Hive column this is mapping + */ + public String getColumnType() { + return columnType; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMappingFactory.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMappingFactory.java new file mode 100644 index 0000000..a241882 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/ColumnMappingFactory.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import java.util.Map.Entry; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.log4j.Logger; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + +/** + * + */ +public class ColumnMappingFactory { + private static final Logger log = Logger.getLogger(ColumnMappingFactory.class); + + /** + * Generate the proper instance of a ColumnMapping + * + * @param columnSpec + * Specification for mapping this column to Accumulo + * @param defaultEncoding + * The default encoding in which values should be encoded to Accumulo + */ + public static ColumnMapping get(String columnSpec, ColumnEncoding defaultEncoding, + String columnName, TypeInfo columnType) { + Preconditions.checkNotNull(columnSpec); + Preconditions.checkNotNull(columnName); + Preconditions.checkNotNull(columnType); + ColumnEncoding encoding = defaultEncoding; + + // Check for column encoding specification + if (ColumnEncoding.hasColumnEncoding(columnSpec)) { + String columnEncodingStr = ColumnEncoding.getColumnEncoding(columnSpec); + columnSpec = ColumnEncoding.stripCode(columnSpec); + + if (AccumuloHiveConstants.ROWID.equalsIgnoreCase(columnSpec)) { + return new HiveAccumuloRowIdColumnMapping(columnSpec, + ColumnEncoding.get(columnEncodingStr), columnName, columnType.getTypeName()); + } else { + Entry pair = parseMapping(columnSpec); + + if (isPrefix(pair.getValue())) { + // Sanity check that, for a map, we got 2 encodings + if (!ColumnEncoding.isMapEncoding(columnEncodingStr)) { + throw new IllegalArgumentException("Expected map encoding for a map specification, " + + columnSpec + " with encoding " + columnEncodingStr); + } + + Entry encodings = ColumnEncoding + .getMapEncoding(columnEncodingStr); + + return new HiveAccumuloMapColumnMapping(pair.getKey(), pair.getValue(), + encodings.getKey(), encodings.getValue(), columnName, columnType.getTypeName()); + } else { + return new HiveAccumuloColumnMapping(pair.getKey(), pair.getValue(), + ColumnEncoding.getFromMapping(columnEncodingStr), columnName, columnType.getTypeName()); + } + } + } else { + if (AccumuloHiveConstants.ROWID.equalsIgnoreCase(columnSpec)) { + return new HiveAccumuloRowIdColumnMapping(columnSpec, defaultEncoding, columnName, + columnType.getTypeName()); + } else { + Entry pair = parseMapping(columnSpec); + boolean isPrefix = isPrefix(pair.getValue()); + + String cq = pair.getValue(); + + // Replace any \* that appear in the prefix with a regular * + if (-1 != cq.indexOf(AccumuloHiveConstants.ESCAPED_ASTERISK)) { + cq = cq.replaceAll(AccumuloHiveConstants.ESCAPED_ASERTISK_REGEX, + Character.toString(AccumuloHiveConstants.ASTERISK)); + } + + if (isPrefix) { + return new HiveAccumuloMapColumnMapping(pair.getKey(), cq.substring(0, cq.length() - 1), + defaultEncoding, defaultEncoding, columnName, columnType.getTypeName()); + } else { + return new HiveAccumuloColumnMapping(pair.getKey(), cq, encoding, columnName, columnType.getTypeName()); + } + } + } + } + + public static ColumnMapping getMap(String columnSpec, ColumnEncoding keyEncoding, + ColumnEncoding valueEncoding, String columnName, TypeInfo columnType) { + Entry pair = parseMapping(columnSpec); + return new HiveAccumuloMapColumnMapping(pair.getKey(), pair.getValue(), keyEncoding, + valueEncoding, columnName, columnType.toString()); + + } + + public static boolean isPrefix(String maybePrefix) { + Preconditions.checkNotNull(maybePrefix); + + if (AccumuloHiveConstants.ASTERISK == maybePrefix.charAt(maybePrefix.length() - 1)) { + if (maybePrefix.length() > 1) { + return AccumuloHiveConstants.ESCAPE != maybePrefix.charAt(maybePrefix.length() - 2); + } else { + return true; + } + } + + // If we couldn't find an asterisk, it's not a prefix + return false; + } + + /** + * Consumes the column mapping specification and breaks it into column family and column + * qualifier. + */ + public static Entry parseMapping(String columnSpec) + throws InvalidColumnMappingException { + int index = 0; + while (true) { + if (index >= columnSpec.length()) { + log.error("Cannot parse '" + columnSpec + "' as colon-separated column configuration"); + throw new InvalidColumnMappingException( + "Columns must be provided as colon-separated family and qualifier pairs"); + } + + index = columnSpec.indexOf(AccumuloHiveConstants.COLON, index); + + if (-1 == index) { + log.error("Cannot parse '" + columnSpec + "' as colon-separated column configuration"); + throw new InvalidColumnMappingException( + "Columns must be provided as colon-separated family and qualifier pairs"); + } + + // Check for an escape character before the colon + if (index - 1 > 0) { + char testChar = columnSpec.charAt(index - 1); + if (AccumuloHiveConstants.ESCAPE == testChar) { + // this colon is escaped, search again after it + index++; + continue; + } + + // If the previous character isn't an escape characters, it's the separator + } + + // Can't be escaped, it is the separator + break; + } + + String cf = columnSpec.substring(0, index), cq = columnSpec.substring(index + 1); + + // Check for the escaped colon to remove before doing the expensive regex replace + if (-1 != cf.indexOf(AccumuloHiveConstants.ESCAPED_COLON)) { + cf = cf.replaceAll(AccumuloHiveConstants.ESCAPED_COLON_REGEX, + Character.toString(AccumuloHiveConstants.COLON)); + } + + // Check for the escaped colon to remove before doing the expensive regex replace + if (-1 != cq.indexOf(AccumuloHiveConstants.ESCAPED_COLON)) { + cq = cq.replaceAll(AccumuloHiveConstants.ESCAPED_COLON_REGEX, + Character.toString(AccumuloHiveConstants.COLON)); + } + + return Maps.immutableEntry(cf, cq); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloColumnMapping.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloColumnMapping.java new file mode 100644 index 0000000..d09ade1 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloColumnMapping.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.log4j.Logger; + +import com.google.common.base.Charsets; + +/** + * A Hive column which maps to a column family and column qualifier pair in Accumulo + */ +public class HiveAccumuloColumnMapping extends ColumnMapping { + @SuppressWarnings("unused") + private static final Logger log = Logger.getLogger(HiveAccumuloColumnMapping.class); + + protected String columnFamily, columnQualifier; + protected byte[] columnFamilyBytes, columnQualifierBytes; + + public HiveAccumuloColumnMapping(String cf, String cq, ColumnEncoding encoding, + String columnName, String columnType) { + super(cf + AccumuloHiveConstants.COLON + cq, encoding, columnName, columnType); + + columnFamily = cf; + columnQualifier = cq; + } + + public String getColumnFamily() { + return this.columnFamily; + } + + /** + * Cached bytes for the columnFamily. Modifications to the bytes will affect those stored in this + * ColumnMapping -- such modifications are highly recommended against. + * + * @return UTF8 formatted bytes + */ + public byte[] getColumnFamilyBytes() { + if (null == columnFamilyBytes) { + columnFamilyBytes = columnFamily.getBytes(Charsets.UTF_8); + } + + return columnFamilyBytes; + } + + public String getColumnQualifier() { + return this.columnQualifier; + } + + /** + * Cached bytes for the columnQualifier. Modifications to the bytes will affect those stored in + * this ColumnMapping -- such modifications are highly recommended against. + * + * @return UTF8 formatted bytes + */ + public byte[] getColumnQualifierBytes() { + if (null == columnQualifierBytes) { + columnQualifierBytes = columnQualifier.getBytes(Charsets.UTF_8); + } + + return columnQualifierBytes; + } + + public String serialize() { + StringBuilder sb = new StringBuilder(16); + sb.append(columnFamily).append(AccumuloHiveConstants.COLON); + if (null != columnQualifier) { + sb.append(columnQualifier); + } + return sb.toString(); + } + + @Override + public String toString() { + return "[" + this.getClass().getSimpleName() + ": " + columnFamily + ":" + columnQualifier + + ", encoding " + encoding + "]"; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloMapColumnMapping.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloMapColumnMapping.java new file mode 100644 index 0000000..b2082e8 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloMapColumnMapping.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; + +import com.google.common.base.Preconditions; + +/** + * ColumnMapping for combining Accumulo columns into a single Hive Map. Expects ColumnEncoding + * values for both the Key and Value of the Map. + */ +public class HiveAccumuloMapColumnMapping extends ColumnMapping { + + protected final String columnFamily, columnQualifierPrefix; + protected final ColumnEncoding keyEncoding, valueEncoding; + + /** + * @param columnFamily + * The column family that all qualifiers within should be placed into the same Hive map + * @param columnQualifierPrefix + * The column qualifier prefix to include in the map, null is treated as an empty prefix + * @param keyEncoding + * The encoding scheme for keys in this column family + * @param valueEncoding + * The encoding scheme for the Accumulo values + */ + public HiveAccumuloMapColumnMapping(String columnFamily, String columnQualifierPrefix, + ColumnEncoding keyEncoding, ColumnEncoding valueEncoding, String columnName, + String columnType) { + // Try to make something reasonable to pass up to the base class + super((null == columnFamily ? "" : columnFamily) + AccumuloHiveConstants.COLON, valueEncoding, + columnName, columnType); + + Preconditions.checkNotNull(columnFamily, "Must provide a column family"); + + this.columnFamily = columnFamily; + this.columnQualifierPrefix = (null == columnQualifierPrefix) ? "" : columnQualifierPrefix; + this.keyEncoding = keyEncoding; + this.valueEncoding = valueEncoding; + } + + public String getColumnFamily() { + return columnFamily; + } + + public String getColumnQualifierPrefix() { + return columnQualifierPrefix; + } + + public ColumnEncoding getKeyEncoding() { + return keyEncoding; + } + + public ColumnEncoding getValueEncoding() { + return valueEncoding; + } + + @Override + public boolean equals(Object o) { + if (o instanceof HiveAccumuloMapColumnMapping) { + HiveAccumuloMapColumnMapping other = (HiveAccumuloMapColumnMapping) o; + return columnFamily.equals(other.columnFamily) + && columnQualifierPrefix.equals(other.columnQualifierPrefix) + && keyEncoding.equals(other.keyEncoding) && valueEncoding.equals(other.valueEncoding); + } + + return false; + } + + @Override + public int hashCode() { + HashCodeBuilder hcb = new HashCodeBuilder(23, 31); + hcb.append(columnFamily).append(columnQualifierPrefix).append(keyEncoding) + .append(valueEncoding); + return hcb.toHashCode(); + } + + @Override + public String toString() { + return "[" + this.getClass().getSimpleName() + ": " + columnFamily + ":" + + columnQualifierPrefix + "* encoding: " + keyEncoding + ":" + valueEncoding + "]"; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloRowIdColumnMapping.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloRowIdColumnMapping.java new file mode 100644 index 0000000..d40b025 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveAccumuloRowIdColumnMapping.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import org.apache.accumulo.core.data.Mutation; +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; + +import com.google.common.base.Preconditions; + +/** + * {@link ColumnMapping} which corresponds to the Hive column which should be used as the rowID in a + * {@link Mutation} + */ +public class HiveAccumuloRowIdColumnMapping extends ColumnMapping { + + public HiveAccumuloRowIdColumnMapping(String columnSpec, ColumnEncoding encoding, + String columnName, String columnType) { + super(columnSpec, encoding, columnName, columnType); + + // Ensure that we have the correct identifier as the column name + Preconditions.checkArgument(columnSpec.equalsIgnoreCase(AccumuloHiveConstants.ROWID)); + } + + @Override + public String toString() { + return "[" + this.getClass().getSimpleName() + ", " + this.mappingSpec + ", encoding " + + encoding + "]"; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveColumn.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveColumn.java new file mode 100644 index 0000000..a8855f7 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/HiveColumn.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.base.Preconditions; + +/** + * + */ +public class HiveColumn { + + // The name of this column in the Hive schema + protected final String columnName; + + // The Hive type of this column + protected final TypeInfo columnType; + + public HiveColumn(String columnName, TypeInfo columnType) { + Preconditions.checkNotNull(columnName); + Preconditions.checkNotNull(columnType); + + this.columnName = columnName; + this.columnType = columnType; + } + + /** + * Get the name of the Hive column + */ + public String getColumnName() { + return columnName; + } + + /** + * The Hive type of this column + */ + public TypeInfo getColumnType() { + return columnType; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/InvalidColumnMappingException.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/InvalidColumnMappingException.java new file mode 100644 index 0000000..eb230c4 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/columns/InvalidColumnMappingException.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +/** + * + */ +public class InvalidColumnMappingException extends IllegalArgumentException { + + private static final long serialVersionUID = 1L; + + public InvalidColumnMappingException() { + super(); + } + + public InvalidColumnMappingException(String msg) { + super(msg); + } + + public InvalidColumnMappingException(String message, Throwable cause) { + super(message, cause); + } + + public InvalidColumnMappingException(Throwable cause) { + super(cause); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloRecordReader.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloRecordReader.java new file mode 100644 index 0000000..45607cb --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloRecordReader.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.mr; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedMap; + +import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.util.PeekingIterator; +import org.apache.hadoop.hive.accumulo.AccumuloHiveRow; +import org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.RecordReader; + +import com.google.common.collect.Lists; + +/** + * Translate the {@link Key} {@link Value} pairs from {@link AccumuloInputFormat} to a + * {@link Writable} for consumption by the {@link AccumuloSerDe}. + */ +public class HiveAccumuloRecordReader implements RecordReader { + private RecordReader>> recordReader; + private int iteratorCount; + + public HiveAccumuloRecordReader( + RecordReader>> recordReader, int iteratorCount) { + this.recordReader = recordReader; + this.iteratorCount = iteratorCount; + } + + @Override + public void close() throws IOException { + recordReader.close(); + } + + @Override + public Text createKey() { + return new Text(); + } + + @Override + public AccumuloHiveRow createValue() { + return new AccumuloHiveRow(); + } + + @Override + public long getPos() throws IOException { + return 0; + } + + @Override + public float getProgress() throws IOException { + return recordReader.getProgress(); + } + + @Override + public boolean next(Text rowKey, AccumuloHiveRow row) throws IOException { + Text key = recordReader.createKey(); + PeekingIterator> iter = recordReader.createValue(); + if (recordReader.next(key, iter)) { + row.clear(); + row.setRowId(key.toString()); + List keys = Lists.newArrayList(); + List values = Lists.newArrayList(); + while (iter.hasNext()) { // collect key/values for this row. + Map.Entry kv = iter.next(); + keys.add(kv.getKey()); + values.add(kv.getValue()); + + } + if (iteratorCount == 0) { // no encoded values, we can push directly to row. + pushToValue(keys, values, row); + } else { + for (int i = 0; i < iteratorCount; i++) { // each iterator creates a level of encoding. + SortedMap decoded = PrimitiveComparisonFilter.decodeRow(keys.get(0), + values.get(0)); + keys = Lists.newArrayList(decoded.keySet()); + values = Lists.newArrayList(decoded.values()); + } + pushToValue(keys, values, row); // after decoding we can push to value. + } + + return true; + } else { + return false; + } + } + + // flatten key/value pairs into row object for use in Serde. + private void pushToValue(List keys, List values, AccumuloHiveRow row) + throws IOException { + Iterator kIter = keys.iterator(); + Iterator vIter = values.iterator(); + while (kIter.hasNext()) { + Key k = kIter.next(); + Value v = vIter.next(); + row.add(k.getColumnFamily().toString(), k.getColumnQualifier().toString(), v.get()); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloSplit.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloSplit.java new file mode 100644 index 0000000..530f232 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloSplit.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.accumulo.mr; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.accumulo.core.client.mapred.RangeInputSplit; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.HiveInputFormat.HiveInputSplit; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Logger; + +/** + * Wraps RangeInputSplit into a FileSplit so Hadoop won't complain when it tries to make its own + * Path. + * + *

+ * If the {@link RangeInputSplit} is used directly, it will hit a branch of code in + * {@link HiveInputSplit} which generates an invalid Path. Wrap it ourselves so that it doesn't + * error + */ +public class HiveAccumuloSplit extends FileSplit implements InputSplit { + private static final Logger log = Logger.getLogger(HiveAccumuloSplit.class); + + private RangeInputSplit split; + + public HiveAccumuloSplit() { + super((Path) null, 0, 0, (String[]) null); + split = new RangeInputSplit(); + } + + public HiveAccumuloSplit(RangeInputSplit split, Path dummyPath) { + super(dummyPath, 0, 0, (String[]) null); + this.split = split; + } + + public RangeInputSplit getSplit() { + return this.split; + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + split.readFields(in); + } + + @Override + public String toString() { + return "HiveAccumuloSplit: " + split; + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + split.write(out); + } + + @Override + public long getLength() { + int len = 0; + try { + return split.getLength(); + } catch (IOException e) { + log.error("Error getting length for split: " + StringUtils.stringifyException(e)); + } + return len; + } + + @Override + public String[] getLocations() throws IOException { + return split.getLocations(); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableInputFormat.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableInputFormat.java new file mode 100644 index 0000000..385b2f4 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableInputFormat.java @@ -0,0 +1,485 @@ +package org.apache.hadoop.hive.accumulo.mr; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.client.mapred.AccumuloInputFormat; +import org.apache.accumulo.core.client.mapred.AccumuloRowInputFormat; +import org.apache.accumulo.core.client.mapred.RangeInputSplit; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.util.Pair; +import org.apache.accumulo.core.util.PeekingIterator; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters; +import org.apache.hadoop.hive.accumulo.AccumuloHiveRow; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping; +import org.apache.hadoop.hive.accumulo.predicate.AccumuloPredicateHandler; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Wraps older InputFormat for use with Hive. + * + * Configure input scan with proper ranges, iterators, and columns based on serde properties for + * Hive table. + */ +public class HiveAccumuloTableInputFormat implements + org.apache.hadoop.mapred.InputFormat { + private static final Logger log = LoggerFactory.getLogger(HiveAccumuloTableInputFormat.class); + + // Visible for testing + protected AccumuloRowInputFormat accumuloInputFormat = new AccumuloRowInputFormat(); + protected AccumuloPredicateHandler predicateHandler = AccumuloPredicateHandler.getInstance(); + + @Override + public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { + final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf); + final Instance instance = accumuloParams.getInstance(); + final ColumnMapper columnMapper; + try { + columnMapper = getColumnMapper(jobConf); + } catch (TooManyAccumuloColumnsException e) { + throw new IOException(e); + } + + JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf)); + Path[] tablePaths = FileInputFormat.getInputPaths(context); + + try { + final Connector connector = accumuloParams.getConnector(instance); + final List columnMappings = columnMapper.getColumnMappings(); + final List iterators = predicateHandler.getIterators(jobConf, columnMapper); + final Collection ranges = predicateHandler.getRanges(jobConf, columnMapper); + + // Setting an empty collection of ranges will, unexpectedly, scan all data + // We don't want that. + if (null != ranges && ranges.isEmpty()) { + return new InputSplit[0]; + } + + // Set the relevant information in the Configuration for the AccumuloInputFormat + configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges); + + int numColumns = columnMappings.size(); + + List readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf); + + // Sanity check + if (numColumns < readColIds.size()) + throw new IOException("Number of column mappings (" + numColumns + ")" + + " numbers less than the hive table columns. (" + readColIds.size() + ")"); + + // get splits from Accumulo + InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits); + + HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length]; + for (int i = 0; i < splits.length; i++) { + RangeInputSplit ris = (RangeInputSplit) splits[i]; + hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]); + } + + return hiveSplits; + } catch (AccumuloException e) { + log.error("Could not configure AccumuloInputFormat", e); + throw new IOException(StringUtils.stringifyException(e)); + } catch (AccumuloSecurityException e) { + log.error("Could not configure AccumuloInputFormat", e); + throw new IOException(StringUtils.stringifyException(e)); + } catch (SerDeException e) { + log.error("Could not configure AccumuloInputFormat", e); + throw new IOException(StringUtils.stringifyException(e)); + } + } + + /** + * Setup accumulo input format from conf properties. Delegates to final RecordReader from mapred + * package. + * + * @param inputSplit + * @param jobConf + * @param reporter + * @return RecordReader + * @throws IOException + */ + @Override + public RecordReader getRecordReader(InputSplit inputSplit, + final JobConf jobConf, final Reporter reporter) throws IOException { + final ColumnMapper columnMapper; + try { + columnMapper = getColumnMapper(jobConf); + } catch (TooManyAccumuloColumnsException e) { + throw new IOException(e); + } + + try { + final List iterators = predicateHandler.getIterators(jobConf, columnMapper); + + HiveAccumuloSplit hiveSplit = (HiveAccumuloSplit) inputSplit; + RangeInputSplit rangeSplit = hiveSplit.getSplit(); + + log.info("Split: " + rangeSplit); + + // The RangeInputSplit *should* have all of the necesary information contained in it + // which alleviates us from re-parsing our configuration from the AccumuloStorageHandler + // and re-setting it into the Configuration (like we did in getSplits(...)). Thus, it should + // be unnecessary to re-invoke configure(...) + + // ACCUMULO-2962 Iterators weren't getting serialized into the InputSplit, but we can + // compensate because we still have that info. + // Should be fixed in Accumulo 1.5.2 and 1.6.1 + if (null == rangeSplit.getIterators() + || (rangeSplit.getIterators().isEmpty() && !iterators.isEmpty())) { + log.debug("Re-setting iterators on InputSplit due to Accumulo bug."); + rangeSplit.setIterators(iterators); + } + + // ACCUMULO-3015 Like the above, RangeInputSplit should have the table name + // but we want it to, so just re-set it if it's null. + if (null == getTableName(rangeSplit)) { + final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters( + jobConf); + log.debug("Re-setting table name on InputSplit due to Accumulo bug."); + setTableName(rangeSplit, accumuloParams.getAccumuloTableName()); + } + + final RecordReader>> recordReader = accumuloInputFormat + .getRecordReader(rangeSplit, jobConf, reporter); + + return new HiveAccumuloRecordReader(recordReader, iterators.size()); + } catch (SerDeException e) { + throw new IOException(StringUtils.stringifyException(e)); + } + } + + protected ColumnMapper getColumnMapper(Configuration conf) throws IOException, + TooManyAccumuloColumnsException { + final String defaultStorageType = conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE); + + String[] columnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS); + if (null == columnNamesArr) { + throw new IOException( + "Hive column names must be provided to InputFormat in the Configuration"); + } + List columnNames = Arrays.asList(columnNamesArr); + + String serializedTypes = conf.get(serdeConstants.LIST_COLUMN_TYPES); + if (null == serializedTypes) { + throw new IOException( + "Hive column types must be provided to InputFormat in the Configuration"); + } + ArrayList columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(serializedTypes); + + return new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), defaultStorageType, + columnNames, columnTypes); + } + + /** + * Configure the underlying AccumuloInputFormat + * + * @param conf + * Job configuration + * @param instance + * Accumulo instance + * @param connector + * Accumulo connector + * @param accumuloParams + * Connection information to the Accumulo instance + * @param columnMapper + * Configuration of Hive to Accumulo columns + * @param iterators + * Any iterators to be configured server-side + * @param ranges + * Accumulo ranges on for the query + * @throws AccumuloSecurityException + * @throws AccumuloException + * @throws SerDeException + */ + protected void configure(JobConf conf, Instance instance, Connector connector, + AccumuloConnectionParameters accumuloParams, ColumnMapper columnMapper, + List iterators, Collection ranges) throws AccumuloSecurityException, + AccumuloException, SerDeException { + + // Handle implementation of Instance and invoke appropriate InputFormat method + if (instance instanceof MockInstance) { + setMockInstance(conf, instance.getInstanceName()); + } else { + setZooKeeperInstance(conf, instance.getInstanceName(), instance.getZooKeepers()); + } + + // Set the username/passwd for the Accumulo connection + setConnectorInfo(conf, accumuloParams.getAccumuloUserName(), + new PasswordToken(accumuloParams.getAccumuloPassword())); + + // Read from the given Accumulo table + setInputTableName(conf, accumuloParams.getAccumuloTableName()); + + // Check Configuration for any user-provided Authorization definition + Authorizations auths = AccumuloSerDeParameters.getAuthorizationsFromConf(conf); + + if (null == auths) { + // Default to all of user's authorizations when no configuration is provided + auths = connector.securityOperations().getUserAuthorizations( + accumuloParams.getAccumuloUserName()); + } + + // Implicitly handles users providing invalid authorizations + setScanAuthorizations(conf, auths); + + // restrict with any filters found from WHERE predicates. + addIterators(conf, iterators); + + // restrict with any ranges found from WHERE predicates. + // not setting ranges scans the entire table + if (null != ranges) { + log.info("Setting ranges: " + ranges); + setRanges(conf, ranges); + } + + // Restrict the set of columns that we want to read from the Accumulo table + HashSet> pairs = getPairCollection(columnMapper.getColumnMappings()); + if (null != pairs && !pairs.isEmpty()) { + fetchColumns(conf, pairs); + } + } + + // Wrap the static AccumuloInputFormat methods with methods that we can + // verify were correctly called via Mockito + + protected void setMockInstance(JobConf conf, String instanceName) { + try { + AccumuloInputFormat.setMockInstance(conf, instanceName); + } catch (IllegalStateException e) { + // AccumuloInputFormat complains if you re-set an already set value. We just don't care. + log.debug("Ignoring exception setting mock instance of " + instanceName, e); + } + } + + @SuppressWarnings("deprecation") + protected void setZooKeeperInstance(JobConf conf, String instanceName, String zkHosts) { + // To support builds against 1.5, we can't use the new 1.6 setZooKeeperInstance which + // takes a ClientConfiguration class that only exists in 1.6 + try { + AccumuloInputFormat.setZooKeeperInstance(conf, instanceName, zkHosts); + } catch (IllegalStateException ise) { + // AccumuloInputFormat complains if you re-set an already set value. We just don't care. + log.debug("Ignoring exception setting ZooKeeper instance of " + instanceName + " at " + + zkHosts, ise); + } + } + + protected void setConnectorInfo(JobConf conf, String user, AuthenticationToken token) + throws AccumuloSecurityException { + try { + AccumuloInputFormat.setConnectorInfo(conf, user, token); + } catch (IllegalStateException e) { + // AccumuloInputFormat complains if you re-set an already set value. We just don't care. + log.debug("Ignoring exception setting Accumulo Connector instance for user " + user, e); + } + } + + protected void setInputTableName(JobConf conf, String tableName) { + AccumuloInputFormat.setInputTableName(conf, tableName); + } + + protected void setScanAuthorizations(JobConf conf, Authorizations auths) { + AccumuloInputFormat.setScanAuthorizations(conf, auths); + } + + protected void addIterators(JobConf conf, List iterators) { + for (IteratorSetting is : iterators) { + AccumuloInputFormat.addIterator(conf, is); + } + } + + protected void setRanges(JobConf conf, Collection ranges) { + AccumuloInputFormat.setRanges(conf, ranges); + } + + protected void fetchColumns(JobConf conf, Set> cfCqPairs) { + AccumuloInputFormat.fetchColumns(conf, cfCqPairs); + } + + /** + * Create col fam/qual pairs from pipe separated values, usually from config object. Ignores + * rowID. + * + * @param columnMappings + * The list of ColumnMappings for the given query + * @return a Set of Pairs of colfams and colquals + */ + protected HashSet> getPairCollection(List columnMappings) { + final HashSet> pairs = new HashSet>(); + + for (ColumnMapping columnMapping : columnMappings) { + if (columnMapping instanceof HiveAccumuloColumnMapping) { + HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping; + + Text cf = new Text(accumuloColumnMapping.getColumnFamily()); + Text cq = null; + + // A null cq implies an empty column qualifier + if (null != accumuloColumnMapping.getColumnQualifier()) { + cq = new Text(accumuloColumnMapping.getColumnQualifier()); + } + + pairs.add(new Pair(cf, cq)); + } else if (columnMapping instanceof HiveAccumuloMapColumnMapping) { + HiveAccumuloMapColumnMapping mapMapping = (HiveAccumuloMapColumnMapping) columnMapping; + + // Can't fetch prefix on colqual, must pull the entire qualifier + // TODO use an iterator to do the filter, server-side. + pairs.add(new Pair(new Text(mapMapping.getColumnFamily()), null)); + } + } + + log.info("Computed columns to fetch (" + pairs + ") from " + columnMappings); + + return pairs; + } + + /** + * Reflection to work around Accumulo 1.5 and 1.6 incompatibilities. Throws an {@link IOException} + * for any reflection related exceptions + * + * @param split + * A RangeInputSplit + * @return The name of the table from the split + * @throws IOException + */ + protected String getTableName(RangeInputSplit split) throws IOException { + // ACCUMULO-3017 shenanigans with method names changing without deprecation + Method getTableName = null; + try { + getTableName = RangeInputSplit.class.getMethod("getTableName"); + } catch (SecurityException e) { + log.debug("Could not get getTableName method from RangeInputSplit", e); + } catch (NoSuchMethodException e) { + log.debug("Could not get getTableName method from RangeInputSplit", e); + } + + if (null != getTableName) { + try { + return (String) getTableName.invoke(split); + } catch (IllegalArgumentException e) { + log.debug("Could not invoke getTableName method from RangeInputSplit", e); + } catch (IllegalAccessException e) { + log.debug("Could not invoke getTableName method from RangeInputSplit", e); + } catch (InvocationTargetException e) { + log.debug("Could not invoke getTableName method from RangeInputSplit", e); + } + } + + Method getTable; + try { + getTable = RangeInputSplit.class.getMethod("getTable"); + } catch (SecurityException e) { + throw new IOException("Could not get table name from RangeInputSplit", e); + } catch (NoSuchMethodException e) { + throw new IOException("Could not get table name from RangeInputSplit", e); + } + + try { + return (String) getTable.invoke(split); + } catch (IllegalArgumentException e) { + throw new IOException("Could not get table name from RangeInputSplit", e); + } catch (IllegalAccessException e) { + throw new IOException("Could not get table name from RangeInputSplit", e); + } catch (InvocationTargetException e) { + throw new IOException("Could not get table name from RangeInputSplit", e); + } + } + + /** + * Sets the table name on a RangeInputSplit, accounting for change in method name. Any reflection + * related exception is wrapped in an {@link IOException} + * + * @param split + * The RangeInputSplit to operate on + * @param tableName + * The name of the table to set + * @throws IOException + */ + protected void setTableName(RangeInputSplit split, String tableName) throws IOException { + // ACCUMULO-3017 shenanigans with method names changing without deprecation + Method setTableName = null; + try { + setTableName = RangeInputSplit.class.getMethod("setTableName", String.class); + } catch (SecurityException e) { + log.debug("Could not get getTableName method from RangeInputSplit", e); + } catch (NoSuchMethodException e) { + log.debug("Could not get getTableName method from RangeInputSplit", e); + } + + if (null != setTableName) { + try { + setTableName.invoke(split, tableName); + return; + } catch (IllegalArgumentException e) { + log.debug("Could not invoke getTableName method from RangeInputSplit", e); + } catch (IllegalAccessException e) { + log.debug("Could not invoke getTableName method from RangeInputSplit", e); + } catch (InvocationTargetException e) { + log.debug("Could not invoke getTableName method from RangeInputSplit", e); + } + } + + Method setTable; + try { + setTable = RangeInputSplit.class.getMethod("setTable", String.class); + } catch (SecurityException e) { + throw new IOException("Could not set table name from RangeInputSplit", e); + } catch (NoSuchMethodException e) { + throw new IOException("Could not set table name from RangeInputSplit", e); + } + + try { + setTable.invoke(split, tableName); + } catch (IllegalArgumentException e) { + throw new IOException("Could not set table name from RangeInputSplit", e); + } catch (IllegalAccessException e) { + throw new IOException("Could not set table name from RangeInputSplit", e); + } catch (InvocationTargetException e) { + throw new IOException("Could not set table name from RangeInputSplit", e); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableOutputFormat.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableOutputFormat.java new file mode 100644 index 0000000..5cf008e --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/mr/HiveAccumuloTableOutputFormat.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.mr; + +import java.io.IOException; + +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.mapred.AccumuloOutputFormat; +import org.apache.accumulo.core.client.security.tokens.AuthenticationToken; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.mapred.JobConf; + +import com.google.common.base.Preconditions; + +/** + * + */ +public class HiveAccumuloTableOutputFormat extends AccumuloOutputFormat { + + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { + configureAccumuloOutputFormat(job); + + super.checkOutputSpecs(ignored, job); + } + + protected void configureAccumuloOutputFormat(JobConf job) throws IOException { + AccumuloConnectionParameters cnxnParams = new AccumuloConnectionParameters(job); + + final String tableName = job.get(AccumuloSerDeParameters.TABLE_NAME); + + // Make sure we actually go the table name + Preconditions.checkNotNull(tableName, + "Expected Accumulo table name to be provided in job configuration"); + + // Set the necessary Accumulo information + try { + // Username/passwd for Accumulo + setAccumuloConnectorInfo(job, cnxnParams.getAccumuloUserName(), + new PasswordToken(cnxnParams.getAccumuloPassword())); + + if (cnxnParams.useMockInstance()) { + setAccumuloMockInstance(job, cnxnParams.getAccumuloInstanceName()); + } else { + // Accumulo instance name with ZK quorum + setAccumuloZooKeeperInstance(job, cnxnParams.getAccumuloInstanceName(), + cnxnParams.getZooKeepers()); + } + + // Set the table where we're writing this data + setDefaultAccumuloTableName(job, tableName); + } catch (AccumuloSecurityException e) { + log.error("Could not connect to Accumulo with provided credentials", e); + throw new IOException(e); + } + } + + // Non-static methods to wrap the static AccumuloOutputFormat methods to enable testing + + protected void setAccumuloConnectorInfo(JobConf conf, String username, AuthenticationToken token) + throws AccumuloSecurityException { + AccumuloOutputFormat.setConnectorInfo(conf, username, token); + } + + @SuppressWarnings("deprecation") + protected void setAccumuloZooKeeperInstance(JobConf conf, String instanceName, String zookeepers) { + AccumuloOutputFormat.setZooKeeperInstance(conf, instanceName, zookeepers); + } + + protected void setAccumuloMockInstance(JobConf conf, String instanceName) { + AccumuloOutputFormat.setMockInstance(conf, instanceName); + } + + protected void setDefaultAccumuloTableName(JobConf conf, String tableName) { + AccumuloOutputFormat.setDefaultTableName(conf, tableName); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/package-info.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/package-info.java new file mode 100644 index 0000000..4fd6ba7 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/package-info.java @@ -0,0 +1,4 @@ +/** + * Serde and InputFormat support for connecting Hive to Accumulo tables. + */ +package org.apache.hadoop.hive.accumulo; \ No newline at end of file diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloPredicateHandler.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloPredicateHandler.java new file mode 100644 index 0000000..5edc9b5 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloPredicateHandler.java @@ -0,0 +1,408 @@ +package org.apache.hadoop.hive.accumulo.predicate; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.data.Range; +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping; +import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp; +import org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.Equal; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.LessThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.LessThanOrEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.Like; +import org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.NotEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison; +import org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer; +import org.apache.hadoop.hive.ql.index.IndexSearchCondition; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.udf.UDFLike; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.log4j.Logger; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +/** + * + * Supporting operations dealing with Hive Predicate pushdown to iterators and ranges. + * + * See {@link PrimitiveComparisonFilter} + * + */ +public class AccumuloPredicateHandler { + private static final List TOTAL_RANGE = Collections.singletonList(new Range()); + + private static AccumuloPredicateHandler handler = new AccumuloPredicateHandler(); + private static Map> compareOps = Maps.newHashMap(); + private static Map> pComparisons = Maps.newHashMap(); + + // Want to start sufficiently "high" enough in the iterator stack + private static int iteratorCount = 50; + + private static final Logger log = Logger.getLogger(AccumuloPredicateHandler.class); + static { + compareOps.put(GenericUDFOPEqual.class.getName(), Equal.class); + compareOps.put(GenericUDFOPNotEqual.class.getName(), NotEqual.class); + compareOps.put(GenericUDFOPGreaterThan.class.getName(), GreaterThan.class); + compareOps.put(GenericUDFOPEqualOrGreaterThan.class.getName(), GreaterThanOrEqual.class); + compareOps.put(GenericUDFOPEqualOrLessThan.class.getName(), LessThanOrEqual.class); + compareOps.put(GenericUDFOPLessThan.class.getName(), LessThan.class); + compareOps.put(UDFLike.class.getName(), Like.class); + + pComparisons.put("bigint", LongCompare.class); + pComparisons.put("int", IntCompare.class); + pComparisons.put("double", DoubleCompare.class); + pComparisons.put("string", StringCompare.class); + } + + public static AccumuloPredicateHandler getInstance() { + return handler; + } + + /** + * + * @return set of all UDF class names with matching CompareOpt implementations. + */ + public Set cOpKeyset() { + return compareOps.keySet(); + } + + /** + * + * @return set of all hive data types with matching PrimitiveCompare implementations. + */ + public Set pComparisonKeyset() { + return pComparisons.keySet(); + } + + /** + * + * @param udfType + * GenericUDF classname to lookup matching CompareOpt + * @return Class + */ + public Class getCompareOpClass(String udfType) + throws NoSuchCompareOpException { + if (!compareOps.containsKey(udfType)) + throw new NoSuchCompareOpException("Null compare op for specified key: " + udfType); + return compareOps.get(udfType); + } + + public CompareOp getCompareOp(String udfType, IndexSearchCondition sc) + throws NoSuchCompareOpException, SerDeException { + Class clz = getCompareOpClass(udfType); + + try { + return clz.newInstance(); + } catch (ClassCastException e) { + throw new SerDeException("Column type mismatch in WHERE clause " + + sc.getComparisonExpr().getExprString() + " found type " + + sc.getConstantDesc().getTypeString() + " instead of " + + sc.getColumnDesc().getTypeString()); + } catch (IllegalAccessException e) { + throw new SerDeException("Could not instantiate class for WHERE clause", e); + } catch (InstantiationException e) { + throw new SerDeException("Could not instantiate class for WHERE clause", e); + } + } + + /** + * + * @param type + * String hive column lookup matching PrimitiveCompare + * @return Class + */ + public Class getPrimitiveComparisonClass(String type) + throws NoSuchPrimitiveComparisonException { + if (!pComparisons.containsKey(type)) + throw new NoSuchPrimitiveComparisonException("Null primitive comparison for specified key: " + + type); + return pComparisons.get(type); + } + + public PrimitiveComparison getPrimitiveComparison(String type, IndexSearchCondition sc) + throws NoSuchPrimitiveComparisonException, SerDeException { + Class clz = getPrimitiveComparisonClass(type); + + try { + return clz.newInstance(); + } catch (ClassCastException e) { + throw new SerDeException("Column type mismatch in WHERE clause " + + sc.getComparisonExpr().getExprString() + " found type " + + sc.getConstantDesc().getTypeString() + " instead of " + + sc.getColumnDesc().getTypeString()); + } catch (IllegalAccessException e) { + throw new SerDeException("Could not instantiate class for WHERE clause", e); + } catch (InstantiationException e) { + throw new SerDeException("Could not instantiate class for WHERE clause", e); + } + } + + private AccumuloPredicateHandler() {} + + /** + * Loop through search conditions and build ranges for predicates involving rowID column, if any. + */ + public List getRanges(Configuration conf, ColumnMapper columnMapper) throws SerDeException { + if (!columnMapper.hasRowIdMapping()) { + return TOTAL_RANGE; + } + + int rowIdOffset = columnMapper.getRowIdOffset(); + String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS); + + if (null == hiveColumnNamesArr) { + throw new IllegalArgumentException("Could not find Hive columns in configuration"); + } + + // Already verified that we should have the rowId mapping + String hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset]; + + ExprNodeDesc root = this.getExpression(conf); + + // No expression, therefore scan the whole table + if (null == root) { + return TOTAL_RANGE; + } + + Object result = generateRanges(columnMapper, hiveRowIdColumnName, root); + + if (null == result) { + log.info("Calculated null set of ranges, scanning full table"); + return TOTAL_RANGE; + } else if (result instanceof Range) { + log.info("Computed a single Range for the query: " + result); + return Collections.singletonList((Range) result); + } else if (result instanceof List) { + log.info("Computed a collection of Ranges for the query: " + result); + @SuppressWarnings("unchecked") + List ranges = (List) result; + return ranges; + } else { + throw new IllegalArgumentException("Unhandled return from Range generation: " + result); + } + } + + /** + * Encapsulates the traversal over some {@link ExprNodeDesc} tree for the generation of Accumuluo + * Ranges using expressions involving the Accumulo rowid-mapped Hive column + * + * @param columnMapper + * Mapping of Hive to Accumulo columns for the query + * @param hiveRowIdColumnName + * Name of the hive column mapped to the Accumulo rowid + * @param root + * Root of some ExprNodeDesc tree to traverse, the WHERE clause + * @return An object representing the result from the ExprNodeDesc tree traversal using the + * AccumuloRangeGenerator + */ + protected Object generateRanges(ColumnMapper columnMapper, String hiveRowIdColumnName, ExprNodeDesc root) { + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, + columnMapper.getRowIdMapping(), hiveRowIdColumnName); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList roots = new ArrayList(); + roots.add(root); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(roots, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + return nodeOutput.get(root); + } + + /** + * Loop through search conditions and build iterator settings for predicates involving columns + * other than rowID, if any. + * + * @param conf + * Configuration + * @throws SerDeException + */ + public List getIterators(Configuration conf, ColumnMapper columnMapper) + throws SerDeException { + List itrs = Lists.newArrayList(); + boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, + AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT); + if (!shouldPushdown) { + log.info("Iterator pushdown is disabled for this table"); + return itrs; + } + + int rowIdOffset = columnMapper.getRowIdOffset(); + String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS); + + if (null == hiveColumnNamesArr) { + throw new IllegalArgumentException("Could not find Hive columns in configuration"); + } + + String hiveRowIdColumnName = null; + + if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) { + hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset]; + } + + List hiveColumnNames = Arrays.asList(hiveColumnNamesArr); + + for (IndexSearchCondition sc : getSearchConditions(conf)) { + String col = sc.getColumnDesc().getColumn(); + if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) { + HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper + .getColumnMappingForHiveColumn(hiveColumnNames, col); + itrs.add(toSetting(mapping, sc)); + } + } + if (log.isInfoEnabled()) + log.info("num iterators = " + itrs.size()); + return itrs; + } + + /** + * Create an IteratorSetting for the right qualifier, constant, CompareOpt, and PrimitiveCompare + * type. + * + * @param accumuloColumnMapping + * ColumnMapping to filter + * @param sc + * IndexSearchCondition + * @return IteratorSetting + * @throws SerDeException + */ + public IteratorSetting toSetting(HiveAccumuloColumnMapping accumuloColumnMapping, + IndexSearchCondition sc) throws SerDeException { + iteratorCount++; + final IteratorSetting is = new IteratorSetting(iteratorCount, + PrimitiveComparisonFilter.FILTER_PREFIX + iteratorCount, PrimitiveComparisonFilter.class); + final String type = sc.getColumnDesc().getTypeString(); + final String comparisonOpStr = sc.getComparisonOp(); + + PushdownTuple tuple; + try { + tuple = new PushdownTuple(sc, getPrimitiveComparison(type, sc), getCompareOp(comparisonOpStr, + sc)); + } catch (NoSuchPrimitiveComparisonException e) { + throw new SerDeException("No configured PrimitiveComparison class for " + type, e); + } catch (NoSuchCompareOpException e) { + throw new SerDeException("No configured CompareOp class for " + comparisonOpStr, e); + } + + is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, tuple.getpCompare().getClass() + .getName()); + is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, tuple.getcOpt().getClass().getName()); + is.addOption(PrimitiveComparisonFilter.CONST_VAL, + new String(Base64.encodeBase64(tuple.getConstVal()))); + is.addOption(PrimitiveComparisonFilter.COLUMN, accumuloColumnMapping.serialize()); + + return is; + } + + public ExprNodeDesc getExpression(Configuration conf) { + String filteredExprSerialized = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR); + if (filteredExprSerialized == null) + return null; + + return Utilities.deserializeExpression(filteredExprSerialized); + } + + /** + * + * @param conf + * Configuration + * @return list of IndexSearchConditions from the filter expression. + */ + public List getSearchConditions(Configuration conf) { + final List sConditions = Lists.newArrayList(); + ExprNodeDesc filterExpr = getExpression(conf); + if (null == filterExpr) { + return sConditions; + } + IndexPredicateAnalyzer analyzer = newAnalyzer(conf); + ExprNodeDesc residual = analyzer.analyzePredicate(filterExpr, sConditions); + if (residual != null) + throw new RuntimeException("Unexpected residual predicate: " + residual.getExprString()); + return sConditions; + } + + /** + * + * @param conf + * Configuration + * @param desc + * predicate expression node. + * @return DecomposedPredicate containing translated search conditions the analyzer can support. + */ + public DecomposedPredicate decompose(Configuration conf, ExprNodeDesc desc) { + IndexPredicateAnalyzer analyzer = newAnalyzer(conf); + List sConditions = new ArrayList(); + ExprNodeDesc residualPredicate = analyzer.analyzePredicate(desc, sConditions); + + if (sConditions.size() == 0) { + if (log.isInfoEnabled()) + log.info("nothing to decompose. Returning"); + return null; + } + + DecomposedPredicate decomposedPredicate = new DecomposedPredicate(); + decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions(sConditions); + decomposedPredicate.residualPredicate = (ExprNodeGenericFuncDesc) residualPredicate; + return decomposedPredicate; + } + + /** + * Build an analyzer that allows comparison opts from compareOpts map, and all columns from table + * definition. + */ + private IndexPredicateAnalyzer newAnalyzer(Configuration conf) { + IndexPredicateAnalyzer analyzer = new IndexPredicateAnalyzer(); + analyzer.clearAllowedColumnNames(); + for (String op : cOpKeyset()) { + analyzer.addComparisonOp(op); + } + + String[] hiveColumnNames = conf.getStrings(serdeConstants.LIST_COLUMNS); + for (String col : hiveColumnNames) { + analyzer.allowColumnName(col); + } + + return analyzer; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloRangeGenerator.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloRangeGenerator.java new file mode 100644 index 0000000..d794e94 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/AccumuloRangeGenerator.java @@ -0,0 +1,355 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.predicate; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Stack; + +import org.apache.accumulo.core.data.Range; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp; +import org.apache.hadoop.hive.accumulo.predicate.compare.Equal; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.LessThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.LessThanOrEqual; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantShortObjectInspector; +import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + */ +public class AccumuloRangeGenerator implements NodeProcessor { + private static final Logger log = LoggerFactory.getLogger(AccumuloRangeGenerator.class); + + private final AccumuloPredicateHandler predicateHandler; + private final HiveAccumuloRowIdColumnMapping rowIdMapping; + private final String hiveRowIdColumnName; + + public AccumuloRangeGenerator(AccumuloPredicateHandler predicateHandler, + HiveAccumuloRowIdColumnMapping rowIdMapping, String hiveRowIdColumnName) { + this.predicateHandler = predicateHandler; + this.rowIdMapping = rowIdMapping; + this.hiveRowIdColumnName = hiveRowIdColumnName; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + // If it's not some operator, pass it back + if (!(nd instanceof ExprNodeGenericFuncDesc)) { + return nd; + } + + ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) nd; + + // 'and' nodes need to be intersected + if (FunctionRegistry.isOpAnd(func)) { + return processAndOpNode(nd, nodeOutputs); + // 'or' nodes need to be merged + } else if (FunctionRegistry.isOpOr(func)) { + return processOrOpNode(nd, nodeOutputs); + } else if (FunctionRegistry.isOpNot(func)) { + // TODO handle negations + throw new IllegalArgumentException("Negations not yet implemented"); + } else { + return processExpression(func, nodeOutputs); + } + } + + protected Object processAndOpNode(Node nd, Object[] nodeOutputs) { + // We might have multiple ranges coming from children + List andRanges = null; + + for (Object nodeOutput : nodeOutputs) { + // null signifies nodes that are irrelevant to the generation + // of Accumulo Ranges + if (null == nodeOutput) { + continue; + } + + // When an AND has no children (some conjunction over a field that isn't the column + // mapped to the Accumulo rowid) and when a conjunction generates Ranges which are empty + // (the children of the conjunction are disjoint), these two cases need to be kept separate. + // + // A null `andRanges` implies that ranges couldn't be computed, while an empty List + // of Ranges implies that there are no possible Ranges to lookup. + if (null == andRanges) { + andRanges = new ArrayList(); + } + + // The child is a single Range + if (nodeOutput instanceof Range) { + Range childRange = (Range) nodeOutput; + + // No existing ranges, just accept the current + if (andRanges.isEmpty()) { + andRanges.add(childRange); + } else { + // For each range we have, intersect them. If they don't overlap + // the range can be discarded + List newRanges = new ArrayList(); + for (Range andRange : andRanges) { + Range intersectedRange = andRange.clip(childRange, true); + if (null != intersectedRange) { + newRanges.add(intersectedRange); + } + } + + // Set the newly-constructed ranges as the current state + andRanges = newRanges; + } + } else if (nodeOutput instanceof List) { + @SuppressWarnings("unchecked") + List childRanges = (List) nodeOutput; + + // No ranges, use the ranges from the child + if (andRanges.isEmpty()) { + andRanges.addAll(childRanges); + } else { + List newRanges = new ArrayList(); + + // Cartesian product of our ranges, to the child ranges + for (Range andRange : andRanges) { + for (Range childRange : childRanges) { + Range intersectedRange = andRange.clip(childRange, true); + + // Retain only valid intersections (discard disjoint ranges) + if (null != intersectedRange) { + newRanges.add(intersectedRange); + } + } + } + + // Set the newly-constructed ranges as the current state + andRanges = newRanges; + } + } else { + log.error("Expected Range from {} but got {}", nd, nodeOutput); + throw new IllegalArgumentException("Expected Range but got " + + nodeOutput.getClass().getName()); + } + } + + return andRanges; + } + + protected Object processOrOpNode(Node nd, Object[] nodeOutputs) { + List orRanges = new ArrayList(nodeOutputs.length); + for (Object nodeOutput : nodeOutputs) { + if (nodeOutput instanceof Range) { + orRanges.add((Range) nodeOutput); + } else if (nodeOutput instanceof List) { + @SuppressWarnings("unchecked") + List childRanges = (List) nodeOutput; + orRanges.addAll(childRanges); + } else { + log.error("Expected Range from " + nd + " but got " + nodeOutput); + throw new IllegalArgumentException("Expected Range but got " + + nodeOutput.getClass().getName()); + } + } + + // Try to merge multiple ranges together + if (orRanges.size() > 1) { + return Range.mergeOverlapping(orRanges); + } else if (1 == orRanges.size()) { + // Return just the single Range + return orRanges.get(0); + } else { + // No ranges, just return the empty list + return orRanges; + } + } + + protected Object processExpression(ExprNodeGenericFuncDesc func, Object[] nodeOutputs) + throws SemanticException { + // a binary operator (gt, lt, ge, le, eq, ne) + GenericUDF genericUdf = func.getGenericUDF(); + + // Find the argument to the operator which is a constant + ExprNodeConstantDesc constantDesc = null; + ExprNodeColumnDesc columnDesc = null; + ExprNodeDesc leftHandNode = null; + for (Object nodeOutput : nodeOutputs) { + if (nodeOutput instanceof ExprNodeConstantDesc) { + // Ordering of constant and column in expression is important in correct range generation + if (null == leftHandNode) { + leftHandNode = (ExprNodeDesc) nodeOutput; + } + + constantDesc = (ExprNodeConstantDesc) nodeOutput; + } else if (nodeOutput instanceof ExprNodeColumnDesc) { + // Ordering of constant and column in expression is important in correct range generation + if (null == leftHandNode) { + leftHandNode = (ExprNodeDesc) nodeOutput; + } + + columnDesc = (ExprNodeColumnDesc) nodeOutput; + } + } + + // If it's constant = constant or column = column, we can't fetch any ranges + // TODO We can try to be smarter and push up the value to some node which + // we can generate ranges from e.g. rowid > (4 + 5) + if (null == constantDesc || null == columnDesc) { + return null; + } + + // Reject any clauses that are against a column that isn't the rowId mapping + if (!this.hiveRowIdColumnName.equals(columnDesc.getColumn())) { + return null; + } + + ConstantObjectInspector objInspector = constantDesc.getWritableObjectInspector(); + + Text constText; + switch (rowIdMapping.getEncoding()) { + case STRING: + constText = getUtf8Value(objInspector); + break; + case BINARY: + try { + constText = getBinaryValue(objInspector); + } catch (IOException e) { + throw new SemanticException(e); + } + break; + default: + throw new SemanticException("Unable to parse unknown encoding: " + + rowIdMapping.getEncoding()); + } + + Class opClz; + try { + opClz = predicateHandler.getCompareOpClass(genericUdf.getUdfName()); + } catch (NoSuchCompareOpException e) { + throw new IllegalArgumentException("Unhandled UDF class: " + genericUdf.getUdfName()); + } + + if (leftHandNode instanceof ExprNodeConstantDesc) { + return getConstantOpColumnRange(opClz, constText); + } else if (leftHandNode instanceof ExprNodeColumnDesc) { + return getColumnOpConstantRange(opClz, constText); + } else { + throw new IllegalStateException("Expected column or constant on LHS of expression"); + } + } + + protected Range getConstantOpColumnRange(Class opClz, Text constText) { + if (opClz.equals(Equal.class)) { + // 100 == x + return new Range(constText); // single row + } else if (opClz.equals(GreaterThanOrEqual.class)) { + // 100 >= x + return new Range(null, constText); // neg-infinity to end inclusive + } else if (opClz.equals(GreaterThan.class)) { + // 100 > x + return new Range(null, false, constText, false); // neg-infinity to end exclusive + } else if (opClz.equals(LessThanOrEqual.class)) { + // 100 <= x + return new Range(constText, true, null, false); // start inclusive to infinity + } else if (opClz.equals(LessThan.class)) { + // 100 < x + return new Range(constText, false, null, false); // start exclusive to infinity + } else { + throw new IllegalArgumentException("Could not process " + opClz); + } + } + + protected Range getColumnOpConstantRange(Class opClz, Text constText) { + if (opClz.equals(Equal.class)) { + return new Range(constText); // start inclusive to end inclusive + } else if (opClz.equals(GreaterThanOrEqual.class)) { + return new Range(constText, null); // start inclusive to infinity inclusive + } else if (opClz.equals(GreaterThan.class)) { + return new Range(constText, false, null, false); // start exclusive to infinity inclusive + } else if (opClz.equals(LessThanOrEqual.class)) { + return new Range(null, false, constText, true); // neg-infinity to start inclusive + } else if (opClz.equals(LessThan.class)) { + return new Range(null, false, constText, false); // neg-infinity to start exclusive + } else { + throw new IllegalArgumentException("Could not process " + opClz); + } + } + + protected Text getUtf8Value(ConstantObjectInspector objInspector) { + // TODO is there a more correct way to get the literal value for the Object? + return new Text(objInspector.getWritableConstantValue().toString()); + } + + /** + * Attempts to construct the binary value from the given inspector. Falls back to UTF8 encoding + * when the value cannot be coerced into binary. + * + * @return Binary value when possible, utf8 otherwise + * @throws IOException + */ + protected Text getBinaryValue(ConstantObjectInspector objInspector) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + if (objInspector instanceof WritableConstantBooleanObjectInspector) { + LazyUtils.writePrimitive(out, objInspector.getWritableConstantValue(), + (WritableConstantBooleanObjectInspector) objInspector); + } else if (objInspector instanceof WritableConstantByteObjectInspector) { + LazyUtils.writePrimitive(out, objInspector.getWritableConstantValue(), + (WritableConstantByteObjectInspector) objInspector); + } else if (objInspector instanceof WritableConstantShortObjectInspector) { + LazyUtils.writePrimitive(out, objInspector.getWritableConstantValue(), + (WritableConstantShortObjectInspector) objInspector); + } else if (objInspector instanceof WritableConstantIntObjectInspector) { + LazyUtils.writePrimitive(out, objInspector.getWritableConstantValue(), + (WritableConstantIntObjectInspector) objInspector); + } else if (objInspector instanceof WritableConstantLongObjectInspector) { + LazyUtils.writePrimitive(out, objInspector.getWritableConstantValue(), + (WritableConstantLongObjectInspector) objInspector); + } else if (objInspector instanceof WritableConstantDoubleObjectInspector) { + LazyUtils.writePrimitive(out, objInspector.getWritableConstantValue(), + (WritableConstantDoubleObjectInspector) objInspector); + } else if (objInspector instanceof WritableConstantFloatObjectInspector) { + LazyUtils.writePrimitive(out, objInspector.getWritableConstantValue(), + (WritableConstantDoubleObjectInspector) objInspector); + } else { + return getUtf8Value(objInspector); + } + + out.close(); + return new Text(out.toByteArray()); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/NoSuchCompareOpException.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/NoSuchCompareOpException.java new file mode 100644 index 0000000..962185c --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/NoSuchCompareOpException.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.predicate; + +/** + * + */ +public class NoSuchCompareOpException extends Exception { + + private static final long serialVersionUID = 1L; + + public NoSuchCompareOpException() { + super(); + } + + public NoSuchCompareOpException(String msg) { + super(msg); + } + + public NoSuchCompareOpException(String msg, Throwable cause) { + super(msg, cause); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/NoSuchPrimitiveComparisonException.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/NoSuchPrimitiveComparisonException.java new file mode 100644 index 0000000..c305a9e --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/NoSuchPrimitiveComparisonException.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.predicate; + +import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison; + +/** + * Used when a {@link PrimitiveComparison} was specified but one with that name cannot be found + */ +public class NoSuchPrimitiveComparisonException extends Exception { + + private static final long serialVersionUID = 1L; + + public NoSuchPrimitiveComparisonException() { + super(); + } + + public NoSuchPrimitiveComparisonException(String msg) { + super(msg); + } + + public NoSuchPrimitiveComparisonException(String msg, Throwable cause) { + super(msg, cause); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PrimitiveComparisonFilter.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PrimitiveComparisonFilter.java new file mode 100644 index 0000000..c303d49 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PrimitiveComparisonFilter.java @@ -0,0 +1,123 @@ +package org.apache.hadoop.hive.accumulo.predicate; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedMap; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.IteratorEnvironment; +import org.apache.accumulo.core.iterators.SortedKeyValueIterator; +import org.apache.accumulo.core.iterators.user.WholeRowIterator; +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMappingFactory; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping; +import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp; +import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.apache.log4j.Logger; + +import com.google.common.collect.Lists; + +/** + * Operates over a single qualifier. + * + * Delegates to PrimitiveCompare and CompareOpt instances for value acceptance. + * + * The PrimitiveCompare strategy assumes a consistent value type for the same column family and + * qualifier. + */ +public class PrimitiveComparisonFilter extends WholeRowIterator { + @SuppressWarnings("unused") + private static final Logger log = Logger.getLogger(PrimitiveComparisonFilter.class); + + public static final String FILTER_PREFIX = "accumulo.filter.compare.iterator."; + public static final String P_COMPARE_CLASS = "accumulo.filter.iterator.p.compare.class"; + public static final String COMPARE_OPT_CLASS = "accumulo.filter.iterator.compare.opt.class"; + public static final String CONST_VAL = "accumulo.filter.iterator.const.val"; + public static final String COLUMN = "accumulo.filter.iterator.qual"; + + private Text cfHolder, cqHolder, columnMappingFamily, columnMappingQualifier; + private HiveAccumuloColumnMapping columnMapping; + private CompareOp compOpt; + + @Override + protected boolean filter(Text currentRow, List keys, List values) { + SortedMap items; + boolean allow; + try { // if key doesn't contain CF, it's an encoded value from a previous iterator. + while (keys.get(0).getColumnFamily().getBytes().length == 0) { + items = decodeRow(keys.get(0), values.get(0)); + keys = Lists.newArrayList(items.keySet()); + values = Lists.newArrayList(items.values()); + } + allow = accept(keys, values); + } catch (IOException e) { + throw new RuntimeException(e); + } + return allow; + } + + private boolean accept(Collection keys, Collection values) { + Iterator kIter = keys.iterator(); + Iterator vIter = values.iterator(); + while (kIter.hasNext()) { + Key k = kIter.next(); + Value v = vIter.next(); + if (matchQualAndFam(k)) { + return compOpt.accept(v.get()); + } + } + return false; + } + + private boolean matchQualAndFam(Key k) { + k.getColumnFamily(cfHolder); + k.getColumnQualifier(cqHolder); + return cfHolder.equals(columnMappingFamily) && cqHolder.equals(columnMappingQualifier); + } + + @Override + public void init(SortedKeyValueIterator source, Map options, + IteratorEnvironment env) throws IOException { + super.init(source, options, env); + String serializedColumnMapping = options.get(COLUMN); + Entry pair = ColumnMappingFactory.parseMapping(serializedColumnMapping); + + // The ColumnEncoding, column name and type are all irrelevant at this point, just need the + // cf:[cq] + columnMapping = new HiveAccumuloColumnMapping(pair.getKey(), pair.getValue(), + ColumnEncoding.STRING, "column", "string"); + columnMappingFamily = new Text(columnMapping.getColumnFamily()); + columnMappingQualifier = new Text(columnMapping.getColumnQualifier()); + cfHolder = new Text(); + cqHolder = new Text(); + + try { + Class pClass = Class.forName(options.get(P_COMPARE_CLASS)); + Class cClazz = Class.forName(options.get(COMPARE_OPT_CLASS)); + PrimitiveComparison pCompare = pClass.asSubclass(PrimitiveComparison.class).newInstance(); + compOpt = cClazz.asSubclass(CompareOp.class).newInstance(); + byte[] constant = getConstant(options); + pCompare.init(constant); + compOpt.setPrimitiveCompare(pCompare); + } catch (ClassNotFoundException e) { + throw new IOException(e); + } catch (InstantiationException e) { + throw new IOException(e); + } catch (IllegalAccessException e) { + throw new IOException(e); + } + } + + protected byte[] getConstant(Map options) { + String b64Const = options.get(CONST_VAL); + return Base64.decodeBase64(b64Const.getBytes()); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PushdownTuple.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PushdownTuple.java new file mode 100644 index 0000000..32d143a --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/PushdownTuple.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.predicate; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp; +import org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison; +import org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare; +import org.apache.hadoop.hive.ql.exec.ExprNodeConstantEvaluator; +import org.apache.hadoop.hive.ql.index.IndexSearchCondition; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Logger; + +/** + * For use in IteratorSetting construction. + * + * encapsulates a constant byte [], PrimitiveCompare instance, and CompareOp instance. + */ +public class PushdownTuple { + private static final Logger log = Logger.getLogger(PushdownTuple.class); + + private byte[] constVal; + private PrimitiveComparison pCompare; + private CompareOp cOpt; + + public PushdownTuple(IndexSearchCondition sc, PrimitiveComparison pCompare, CompareOp cOpt) + throws SerDeException { + ExprNodeConstantEvaluator eval = new ExprNodeConstantEvaluator(sc.getConstantDesc()); + + try { + this.pCompare = pCompare; + this.cOpt = cOpt; + Writable writable = (Writable) eval.evaluate(null); + constVal = getConstantAsBytes(writable); + } catch (ClassCastException cce) { + log.info(StringUtils.stringifyException(cce)); + throw new SerDeException(" Column type mismatch in where clause " + + sc.getComparisonExpr().getExprString() + " found type " + + sc.getConstantDesc().getTypeString() + " instead of " + + sc.getColumnDesc().getTypeString()); + } catch (HiveException e) { + throw new SerDeException(e); + } + } + + public byte[] getConstVal() { + return constVal; + } + + public PrimitiveComparison getpCompare() { + return pCompare; + } + + public CompareOp getcOpt() { + return cOpt; + } + + /** + * + * @return byte [] value from writable. + * @throws SerDeException + */ + public byte[] getConstantAsBytes(Writable writable) throws SerDeException { + if (pCompare instanceof StringCompare) { + return writable.toString().getBytes(); + } else if (pCompare instanceof DoubleCompare) { + byte[] bts = new byte[8]; + double val = ((DoubleWritable) writable).get(); + ByteBuffer.wrap(bts).putDouble(val); + return bts; + } else if (pCompare instanceof IntCompare) { + byte[] bts = new byte[4]; + int val = ((IntWritable) writable).get(); + ByteBuffer.wrap(bts).putInt(val); + return bts; + } else if (pCompare instanceof LongCompare) { + byte[] bts = new byte[8]; + long val = ((LongWritable) writable).get(); + ByteBuffer.wrap(bts).putLong(val); + return bts; + } else { + throw new SerDeException("Unsupported primitive category: " + pCompare.getClass().getName()); + } + } + +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/CompareOp.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/CompareOp.java new file mode 100644 index 0000000..0585333 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/CompareOp.java @@ -0,0 +1,26 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Handles different types of comparisons in hive predicates. Filter iterator delegates value + * acceptance to the CompareOpt. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter}. Works with + * {@link PrimitiveComparison} + */ +public interface CompareOp { + /** + * Sets the PrimitiveComparison for this CompareOp + */ + public void setPrimitiveCompare(PrimitiveComparison comp); + + /** + * @return The PrimitiveComparison this CompareOp is a part of + */ + public PrimitiveComparison getPrimitiveCompare(); + + /** + * @param val The bytes from the Accumulo Value + * @return true if the value is accepted by this CompareOp + */ + public boolean accept(byte[] val); +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/DoubleCompare.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/DoubleCompare.java new file mode 100644 index 0000000..210ad72 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/DoubleCompare.java @@ -0,0 +1,90 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; + +/** + * Set of comparison operations over a double constant. Used for Hive predicates involving double + * comparison. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class DoubleCompare implements PrimitiveComparison { + + private BigDecimal constant; + + /** + * + */ + public void init(byte[] constant) { + this.constant = serialize(constant); + } + + /** + * @return BigDecimal holding double byte [] value + */ + public BigDecimal serialize(byte[] value) { + try { + return new BigDecimal(ByteBuffer.wrap(value).asDoubleBuffer().get()); + } catch (Exception e) { + throw new RuntimeException(e.toString() + " occurred trying to build double value. " + + "Make sure the value type for the byte[] is double."); + } + } + + /** + * @return true if double value is equal to constant, false otherwise. + */ + @Override + public boolean isEqual(byte[] value) { + return serialize(value).compareTo(constant) == 0; + } + + /** + * @return true if double value not equal to constant, false otherwise. + */ + @Override + public boolean isNotEqual(byte[] value) { + return serialize(value).compareTo(constant) != 0; + } + + /** + * @return true if value greater than or equal to constant, false otherwise. + */ + @Override + public boolean greaterThanOrEqual(byte[] value) { + return serialize(value).compareTo(constant) >= 0; + } + + /** + * @return true if value greater than constant, false otherwise. + */ + @Override + public boolean greaterThan(byte[] value) { + return serialize(value).compareTo(constant) > 0; + } + + /** + * @return true if value less than or equal than constant, false otherwise. + */ + @Override + public boolean lessThanOrEqual(byte[] value) { + return serialize(value).compareTo(constant) <= 0; + } + + /** + * @return true if value less than constant, false otherwise. + */ + @Override + public boolean lessThan(byte[] value) { + return serialize(value).compareTo(constant) < 0; + } + + /** + * not supported for this PrimitiveCompare implementation. + */ + @Override + public boolean like(byte[] value) { + throw new UnsupportedOperationException("Like not supported for " + getClass().getName()); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/Equal.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/Equal.java new file mode 100644 index 0000000..3a34f12 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/Equal.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps call to isEqual() over PrimitiveCompare instance. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class Equal implements CompareOp { + + private PrimitiveComparison comp; + + public Equal() {} + + public Equal(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public void setPrimitiveCompare(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public PrimitiveComparison getPrimitiveCompare() { + return comp; + } + + @Override + public boolean accept(byte[] val) { + return comp.isEqual(val); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/GreaterThan.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/GreaterThan.java new file mode 100644 index 0000000..a47b2a3 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/GreaterThan.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps call to greaterThan over {@link PrimitiveComparison} instance. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class GreaterThan implements CompareOp { + + private PrimitiveComparison comp; + + public GreaterThan() {} + + public GreaterThan(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public void setPrimitiveCompare(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public PrimitiveComparison getPrimitiveCompare() { + return this.comp; + } + + @Override + public boolean accept(byte[] val) { + return comp.greaterThan(val); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/GreaterThanOrEqual.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/GreaterThanOrEqual.java new file mode 100644 index 0000000..c502a45 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/GreaterThanOrEqual.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps call to greaterThanOrEqual over {@link PrimitiveComparison} instance. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class GreaterThanOrEqual implements CompareOp { + + private PrimitiveComparison comp; + + public GreaterThanOrEqual() {} + + public GreaterThanOrEqual(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public void setPrimitiveCompare(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public PrimitiveComparison getPrimitiveCompare() { + return comp; + } + + @Override + public boolean accept(byte[] val) { + return comp.greaterThanOrEqual(val); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/IntCompare.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/IntCompare.java new file mode 100644 index 0000000..d7de1ff --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/IntCompare.java @@ -0,0 +1,63 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import java.nio.ByteBuffer; + +/** + * Set of comparison operations over a integer constant. Used for Hive predicates involving int + * comparison. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class IntCompare implements PrimitiveComparison { + + private int constant; + + @Override + public void init(byte[] constant) { + this.constant = serialize(constant); + } + + @Override + public boolean isEqual(byte[] value) { + return serialize(value) == constant; + } + + @Override + public boolean isNotEqual(byte[] value) { + return serialize(value) != constant; + } + + @Override + public boolean greaterThanOrEqual(byte[] value) { + return serialize(value) >= constant; + } + + @Override + public boolean greaterThan(byte[] value) { + return serialize(value) > constant; + } + + @Override + public boolean lessThanOrEqual(byte[] value) { + return serialize(value) <= constant; + } + + @Override + public boolean lessThan(byte[] value) { + return serialize(value) < constant; + } + + @Override + public boolean like(byte[] value) { + throw new UnsupportedOperationException("Like not supported for " + getClass().getName()); + } + + public Integer serialize(byte[] value) { + try { + return ByteBuffer.wrap(value).asIntBuffer().get(); + } catch (Exception e) { + throw new RuntimeException(e.toString() + " occurred trying to build int value. " + + "Make sure the value type for the byte[] is int "); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LessThan.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LessThan.java new file mode 100644 index 0000000..2933131 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LessThan.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps call to lessThan over {@link PrimitiveComparison} instance. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class LessThan implements CompareOp { + + private PrimitiveComparison comp; + + public LessThan() {} + + public LessThan(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public void setPrimitiveCompare(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public PrimitiveComparison getPrimitiveCompare() { + return comp; + } + + @Override + public boolean accept(byte[] val) { + return comp.lessThan(val); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LessThanOrEqual.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LessThanOrEqual.java new file mode 100644 index 0000000..86acb73 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LessThanOrEqual.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps call to lessThanOrEqual over {@link PrimitiveComparison} instance. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class LessThanOrEqual implements CompareOp { + + private PrimitiveComparison comp; + + public LessThanOrEqual() {} + + public LessThanOrEqual(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public void setPrimitiveCompare(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public PrimitiveComparison getPrimitiveCompare() { + return comp; + } + + @Override + public boolean accept(byte[] val) { + return comp.lessThanOrEqual(val); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/Like.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/Like.java new file mode 100644 index 0000000..612641d --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/Like.java @@ -0,0 +1,33 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps call to like over {@link PrimitiveComparison} instance. Currently only supported by + * StringCompare. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class Like implements CompareOp { + + PrimitiveComparison comp; + + public Like() {} + + public Like(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public void setPrimitiveCompare(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public PrimitiveComparison getPrimitiveCompare() { + return comp; + } + + @Override + public boolean accept(byte[] val) { + return comp.like(val); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LongCompare.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LongCompare.java new file mode 100644 index 0000000..b32874f --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/LongCompare.java @@ -0,0 +1,64 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import java.nio.ByteBuffer; + +/** + * Set of comparison operations over a long constant. Used for Hive predicates involving long + * comparison. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class LongCompare implements PrimitiveComparison { + + private long constant; + + @Override + public void init(byte[] constant) { + this.constant = serialize(constant); + } + + @Override + public boolean isEqual(byte[] value) { + long lonVal = serialize(value); + return lonVal == constant; + } + + @Override + public boolean isNotEqual(byte[] value) { + return serialize(value) != constant; + } + + @Override + public boolean greaterThanOrEqual(byte[] value) { + return serialize(value) >= constant; + } + + @Override + public boolean greaterThan(byte[] value) { + return serialize(value) > constant; + } + + @Override + public boolean lessThanOrEqual(byte[] value) { + return serialize(value) <= constant; + } + + @Override + public boolean lessThan(byte[] value) { + return serialize(value) < constant; + } + + @Override + public boolean like(byte[] value) { + throw new UnsupportedOperationException("Like not supported for " + getClass().getName()); + } + + public Long serialize(byte[] value) { + try { + return ByteBuffer.wrap(value).asLongBuffer().get(); + } catch (Exception e) { + throw new RuntimeException(e.toString() + " occurred trying to build long value. " + + "Make sure the value type for the byte[] is long "); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/NotEqual.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/NotEqual.java new file mode 100644 index 0000000..22b84ba --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/NotEqual.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps call to isEqual over {@link PrimitiveComparison} instance and returns the negation. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class NotEqual implements CompareOp { + + private PrimitiveComparison comp; + + public NotEqual() {} + + public NotEqual(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public void setPrimitiveCompare(PrimitiveComparison comp) { + this.comp = comp; + } + + @Override + public PrimitiveComparison getPrimitiveCompare() { + return comp; + } + + @Override + public boolean accept(byte[] val) { + return !comp.isEqual(val); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/PrimitiveComparison.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/PrimitiveComparison.java new file mode 100644 index 0000000..26e194f --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/PrimitiveComparison.java @@ -0,0 +1,32 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +/** + * Wraps type-specific comparison operations over a constant value. Methods take raw byte from + * incoming Accumulo values. + * + * The CompareOpt instance in the iterator uses one or more methods from a PrimitiveCompare + * implementation to perform type-specific comparisons and determine acceptances. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter}. Works with + * {@link CompareOp} + */ +public interface PrimitiveComparison { + + public boolean isEqual(byte[] value); + + public boolean isNotEqual(byte[] value); + + public boolean greaterThanOrEqual(byte[] value); + + public boolean greaterThan(byte[] value); + + public boolean lessThanOrEqual(byte[] value); + + public boolean lessThan(byte[] value); + + public boolean like(byte[] value); + + public Object serialize(byte[] value); + + public void init(byte[] constant); +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/StringCompare.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/StringCompare.java new file mode 100644 index 0000000..b71b8a8 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/StringCompare.java @@ -0,0 +1,65 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; + +/** + * Set of comparison operations over a string constant. Used for Hive predicates involving string + * comparison. + * + * Used by {@link org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter} + */ +public class StringCompare implements PrimitiveComparison { + @SuppressWarnings("unused") + private static final Logger log = Logger.getLogger(StringCompare.class); + + private String constant; + + @Override + public void init(byte[] constant) { + this.constant = serialize(constant); + } + + @Override + public boolean isEqual(byte[] value) { + return serialize(value).equals(constant); + } + + @Override + public boolean isNotEqual(byte[] value) { + return !isEqual(value); + } + + @Override + public boolean greaterThanOrEqual(byte[] value) { + return serialize(value).compareTo(constant) >= 0; + } + + @Override + public boolean greaterThan(byte[] value) { + return serialize(value).compareTo(constant) > 0; + } + + @Override + public boolean lessThanOrEqual(byte[] value) { + return serialize(value).compareTo(constant) <= 0; + } + + @Override + public boolean lessThan(byte[] value) { + return serialize(value).compareTo(constant) < 0; + } + + @Override + public boolean like(byte[] value) { + String temp = new String(value).replaceAll("%", "[\\\\\\w]+?"); + Pattern pattern = Pattern.compile(temp); + boolean match = pattern.matcher(constant).matches(); + return match; + } + + public String serialize(byte[] value) { + return new String(value); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/package-info.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/package-info.java new file mode 100644 index 0000000..875fad2 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/compare/package-info.java @@ -0,0 +1,4 @@ +/** + * PrimitiveCompare and CompareOpt implementations for use in PrimitiveComparisonFilter iterator + */ +package org.apache.hadoop.hive.accumulo.predicate.compare; \ No newline at end of file diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/package-info.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/package-info.java new file mode 100644 index 0000000..419ce01 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/predicate/package-info.java @@ -0,0 +1,4 @@ +/** + * Predicate pushdown to Accumulo filter iterators. + */ +package org.apache.hadoop.hive.accumulo.predicate; \ No newline at end of file diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloCompositeRowId.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloCompositeRowId.java new file mode 100644 index 0000000..f3ebbd1 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloCompositeRowId.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.accumulo.serde; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyObject; +import org.apache.hadoop.hive.serde2.lazy.LazyStruct; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; + +/** + * AccumuloCompositeKey extension of LazyStruct. All complex composite keys should extend this class + * and override the {@link LazyStruct#getField(int)} method where fieldID corresponds to the ID of a + * key in the composite key. + *

+ * For example, for a composite key "/part1/part2/part3", part1 will have an id + * 0, part2 will have an id 1 and part3 will have an id 2. Custom + * implementations of getField(fieldID) should return the value corresponding to that fieldID. So, + * for the above example, the value returned for getField(0) should be part1, + * getField(1) should be part2 and getField(2) should be part3. + *

+ * + *

+ * All custom implementation are expected to have a constructor of the form: + * + *

+ * MyCustomCompositeKey(LazySimpleStructObjectInspector oi, Properties tbl, Configuration conf)
+ * 
+ *

+ * + */ +public class AccumuloCompositeRowId extends LazyStruct { + + public AccumuloCompositeRowId(LazySimpleStructObjectInspector oi) { + super(oi); + } + + @Override + public ArrayList getFieldsAsList() { + ArrayList allFields = new ArrayList(); + + List fields = oi.getAllStructFieldRefs(); + + for (int i = 0; i < fields.size(); i++) { + allFields.add(getField(i)); + } + + return allFields; + } + + /** + * Create an initialize a {@link LazyObject} with the given bytes for the given fieldID. + * + * @param fieldID + * field for which the object is to be created + * @param bytes + * value with which the object is to be initialized with + * @return initialized {@link LazyObject} + * */ + public LazyObject toLazyObject(int fieldID, byte[] bytes) { + ObjectInspector fieldOI = oi.getAllStructFieldRefs().get(fieldID).getFieldObjectInspector(); + + LazyObject lazyObject = LazyFactory.createLazyObject(fieldOI); + + ByteArrayRef ref = new ByteArrayRef(); + + ref.setData(bytes); + + // initialize the lazy object + lazyObject.init(ref, 0, ref.getData().length); + + return lazyObject; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowIdFactory.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowIdFactory.java new file mode 100644 index 0000000..d82a392 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowIdFactory.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * Interface for providing custom Accumulo RowID generation/parsing + */ +public interface AccumuloRowIdFactory { + + /** + * initialize factory with properties + */ + public void init(AccumuloSerDeParameters serDeParams, Properties properties) + throws SerDeException; + + /** + * create custom object inspector for accumulo rowId + * + * @param type + * type information + */ + public ObjectInspector createRowIdObjectInspector(TypeInfo type) throws SerDeException; + + /** + * create custom object for accumulo + * + * @param inspector + * OI create by {@link AccumuloRowIdFactory#createRowIdObjectInspector} + */ + public LazyObjectBase createRowId(ObjectInspector inspector) throws SerDeException; + + /** + * serialize hive object in internal format of custom key + */ + public byte[] serializeRowId(Object object, StructField field, ByteStream.Output output) + throws IOException; + + /** + * Add this implementation to the classpath for the Job + */ + public void addDependencyJars(Configuration conf) throws IOException; +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java new file mode 100644 index 0000000..d168012 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java @@ -0,0 +1,383 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping; +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.log4j.Logger; + +import com.google.common.base.Charsets; +import com.google.common.base.Preconditions; + +/** + * Serializes a Struct to an Accumulo row as per the definition provided by the + * {@link ColumnMapping}s + */ +public class AccumuloRowSerializer { + private static final Logger log = Logger.getLogger(AccumuloRowSerializer.class); + + private final int rowIdOffset; + private final ByteStream.Output output; + private final SerDeParameters serDeParams; + private final List mappings; + private final ColumnVisibility visibility; + private final AccumuloRowIdFactory rowIdFactory; + + public AccumuloRowSerializer(int primaryKeyOffset, SerDeParameters serDeParams, + List mappings, ColumnVisibility visibility, AccumuloRowIdFactory rowIdFactory) { + Preconditions.checkArgument(primaryKeyOffset >= 0, + "A valid offset to the mapping for the Accumulo RowID is required, received " + + primaryKeyOffset); + this.rowIdOffset = primaryKeyOffset; + this.output = new ByteStream.Output(); + this.serDeParams = serDeParams; + this.mappings = mappings; + this.visibility = visibility; + this.rowIdFactory = rowIdFactory; + } + + public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDeException, + IOException { + if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only serialize struct types, but we got: " + objInspector.getTypeName()); + } + + // Prepare the field ObjectInspectors + StructObjectInspector soi = (StructObjectInspector) objInspector; + List fields = soi.getAllStructFieldRefs(); + List columnValues = soi.getStructFieldsDataAsList(obj); + + // Fail if we try to access an offset out of bounds + if (rowIdOffset >= fields.size()) { + throw new IllegalStateException( + "Attempted to access field outside of definition for struct. Have " + fields.size() + + " fields and tried to access offset " + rowIdOffset); + } + + StructField field = fields.get(rowIdOffset); + Object value = columnValues.get(rowIdOffset); + + // The ObjectInspector for the row ID + ObjectInspector fieldObjectInspector = field.getFieldObjectInspector(); + + log.info("Serializing rowId with " + value + " in " + field + " using " + + rowIdFactory.getClass()); + + // Serialize the row component using the RowIdFactory. In the normal case, this will just + // delegate back to the "local" serializeRowId method + byte[] data = rowIdFactory.serializeRowId(value, field, output); + + // Set that as the row id in the mutation + Mutation mutation = new Mutation(data); + + // Each column in the row + for (int i = 0; i < fields.size(); i++) { + if (rowIdOffset == i) { + continue; + } + + // Get the relevant information for this column + field = fields.get(i); + value = columnValues.get(i); + + // Despite having a fixed schema from Hive, we have sparse columns in Accumulo + if (null == value) { + continue; + } + + // The ObjectInspector for the current column + fieldObjectInspector = field.getFieldObjectInspector(); + + // Make sure we got the right implementation of a ColumnMapping + ColumnMapping mapping = mappings.get(i); + if (mapping instanceof HiveAccumuloColumnMapping) { + serializeColumnMapping((HiveAccumuloColumnMapping) mapping, fieldObjectInspector, value, + mutation); + } else if (mapping instanceof HiveAccumuloMapColumnMapping) { + serializeColumnMapping((HiveAccumuloMapColumnMapping) mapping, fieldObjectInspector, value, + mutation); + } else { + throw new IllegalArgumentException("Mapping for " + field.getFieldName() + + " was not a HiveColumnMapping, but was " + mapping.getClass()); + } + + } + + return mutation; + } + + protected void serializeColumnMapping(HiveAccumuloColumnMapping columnMapping, + ObjectInspector fieldObjectInspector, Object value, Mutation mutation) throws IOException { + // Get the serialized value for the column + byte[] serializedValue = getSerializedValue(fieldObjectInspector, value, output, columnMapping); + + // Put it all in the Mutation + mutation.put(columnMapping.getColumnFamilyBytes(), columnMapping.getColumnQualifierBytes(), + visibility, serializedValue); + } + + /** + * Serialize the Hive Map into an Accumulo row + */ + protected void serializeColumnMapping(HiveAccumuloMapColumnMapping columnMapping, + ObjectInspector fieldObjectInspector, Object value, Mutation mutation) throws IOException { + MapObjectInspector mapObjectInspector = (MapObjectInspector) fieldObjectInspector; + + Map map = mapObjectInspector.getMap(value); + if (map == null) { + return; + } + + ObjectInspector keyObjectInspector = mapObjectInspector.getMapKeyObjectInspector(), valueObjectInspector = mapObjectInspector + .getMapValueObjectInspector(); + + byte[] cfBytes = columnMapping.getColumnFamily().getBytes(Charsets.UTF_8), cqPrefixBytes = columnMapping + .getColumnQualifierPrefix().getBytes(Charsets.UTF_8); + byte[] cqBytes, valueBytes; + for (Entry entry : map.entrySet()) { + output.reset(); + + // If the cq prefix is non-empty, add it to the CQ before we set the mutation + if (0 < cqPrefixBytes.length) { + output.write(cqPrefixBytes, 0, cqPrefixBytes.length); + } + + // Write the "suffix" of the cq + writeWithLevel(keyObjectInspector, entry.getKey(), output, columnMapping, 3); + cqBytes = output.toByteArray(); + + output.reset(); + + // Write the value + writeWithLevel(valueObjectInspector, entry.getValue(), output, columnMapping, 3); + valueBytes = output.toByteArray(); + + mutation.put(cfBytes, cqBytes, visibility, valueBytes); + } + } + + /** + * Serialize an Accumulo rowid + */ + protected byte[] serializeRowId(Object rowId, StructField rowIdField, ColumnMapping rowIdMapping) + throws IOException { + if (rowId == null) { + throw new IOException("Accumulo rowId cannot be NULL"); + } + // Reset the buffer we're going to use + output.reset(); + ObjectInspector rowIdFieldOI = rowIdField.getFieldObjectInspector(); + String rowIdMappingType = rowIdMapping.getColumnType(); + TypeInfo rowIdTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(rowIdMappingType); + + if (!rowIdFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) + && rowIdTypeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) { + // we always serialize the String type using the escaped algorithm for LazyString + writeString(output, SerDeUtils.getJSONString(rowId, rowIdFieldOI), + PrimitiveObjectInspectorFactory.javaStringObjectInspector); + return output.toByteArray(); + } + + // use the serialization option switch to write primitive values as either a variable + // length UTF8 string or a fixed width bytes if serializing in binary format + getSerializedValue(rowIdFieldOI, rowId, output, rowIdMapping); + return output.toByteArray(); + } + + /** + * Compute the serialized value from the given element and object inspectors. Based on the Hive + * types, represented through the ObjectInspectors for the whole object and column within the + * object, serialize the object appropriately. + * + * @param fieldObjectInspector + * ObjectInspector for the column value being serialized + * @param value + * The Object itself being serialized + * @param output + * A temporary buffer to reduce object creation + * @return The serialized bytes from the provided value. + * @throws IOException + * An error occurred when performing IO to serialize the data + */ + protected byte[] getSerializedValue(ObjectInspector fieldObjectInspector, Object value, + ByteStream.Output output, ColumnMapping mapping) throws IOException { + // Reset the buffer we're going to use + output.reset(); + + // Start by only serializing primitives as-is + if (fieldObjectInspector.getCategory() == ObjectInspector.Category.PRIMITIVE) { + writeSerializedPrimitive((PrimitiveObjectInspector) fieldObjectInspector, output, value, + mapping.getEncoding()); + } else { + // We only accept a struct, which means that we're already nested one level deep + writeWithLevel(fieldObjectInspector, value, output, mapping, 2); + } + + return output.toByteArray(); + } + + /** + * Recursively serialize an Object using its {@link ObjectInspector}, respecting the + * separators defined by the {@link SerDeParameters}. + * @param oi ObjectInspector for the current object + * @param value The current object + * @param output A buffer output is written to + * @param mapping The mapping for this Hive column + * @param level The current level/offset for the SerDe separator + * @throws IOException + */ + protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Output output, + ColumnMapping mapping, int level) throws IOException { + switch (oi.getCategory()) { + case PRIMITIVE: + if (mapping.getEncoding() == ColumnEncoding.BINARY) { + this.writeBinary(output, value, (PrimitiveObjectInspector) oi); + } else { + this.writeString(output, value, (PrimitiveObjectInspector) oi); + } + return; + case LIST: + char separator = (char) serDeParams.getSeparators()[level]; + ListObjectInspector loi = (ListObjectInspector) oi; + List list = loi.getList(value); + ObjectInspector eoi = loi.getListElementObjectInspector(); + if (list == null) { + log.debug("No objects found when serializing list"); + return; + } else { + for (int i = 0; i < list.size(); i++) { + if (i > 0) { + output.write(separator); + } + writeWithLevel(eoi, list.get(i), output, mapping, level + 1); + } + } + return; + case MAP: + char sep = (char) serDeParams.getSeparators()[level]; + char keyValueSeparator = (char) serDeParams.getSeparators()[level + 1]; + MapObjectInspector moi = (MapObjectInspector) oi; + ObjectInspector koi = moi.getMapKeyObjectInspector(); + ObjectInspector voi = moi.getMapValueObjectInspector(); + + Map map = moi.getMap(value); + if (map == null) { + log.debug("No object found when serializing map"); + return; + } else { + boolean first = true; + for (Map.Entry entry : map.entrySet()) { + if (first) { + first = false; + } else { + output.write(sep); + } + writeWithLevel(koi, entry.getKey(), output, mapping, level + 2); + output.write(keyValueSeparator); + writeWithLevel(voi, entry.getValue(), output, mapping, level + 2); + } + } + return; + case STRUCT: + sep = (char) serDeParams.getSeparators()[level]; + StructObjectInspector soi = (StructObjectInspector) oi; + List fields = soi.getAllStructFieldRefs(); + list = soi.getStructFieldsDataAsList(value); + if (list == null) { + log.debug("No object found when serializing struct"); + return; + } else { + for (int i = 0; i < list.size(); i++) { + if (i > 0) { + output.write(sep); + } + + writeWithLevel(fields.get(i).getFieldObjectInspector(), list.get(i), output, mapping, + level + 1); + } + } + + return; + default: + throw new RuntimeException("Unknown category type: " + oi.getCategory()); + } + } + + /** + * Serialize the given primitive to the given output buffer, using the provided encoding + * mechanism. + * + * @param objectInspector + * The PrimitiveObjectInspector for this Object + * @param output + * A buffer to write the serialized value to + * @param value + * The Object being serialized + * @param encoding + * The means in which the Object should be serialized + * @throws IOException + */ + protected void writeSerializedPrimitive(PrimitiveObjectInspector objectInspector, + ByteStream.Output output, Object value, ColumnEncoding encoding) throws IOException { + // Despite STRING being a primitive, it can't be serialized as binary + if (objectInspector.getPrimitiveCategory() != PrimitiveCategory.STRING && ColumnEncoding.BINARY == encoding) { + writeBinary(output, value, objectInspector); + } else { + writeString(output, value, objectInspector); + } + } + + protected void writeBinary(ByteStream.Output output, Object value, + PrimitiveObjectInspector inspector) throws IOException { + LazyUtils.writePrimitive(output, value, inspector); + } + + protected void writeString(ByteStream.Output output, Object value, + PrimitiveObjectInspector inspector) throws IOException { + LazyUtils.writePrimitiveUTF8(output, value, inspector, serDeParams.isEscaped(), + serDeParams.getEscapeChar(), serDeParams.getNeedsEscape()); + } + + protected ColumnVisibility getVisibility() { + return visibility; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java new file mode 100644 index 0000000..240521f --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java @@ -0,0 +1,140 @@ +package org.apache.hadoop.hive.accumulo.serde; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.accumulo.core.data.Mutation; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.AccumuloHiveRow; +import org.apache.hadoop.hive.accumulo.LazyAccumuloRow; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Deserialization from Accumulo to LazyAccumuloRow for Hive. + * + */ +public class AccumuloSerDe implements SerDe { + + private AccumuloSerDeParameters accumuloSerDeParameters; + private LazyAccumuloRow cachedRow; + private ObjectInspector cachedObjectInspector; + private AccumuloRowSerializer serializer; + + private static final Logger log = LoggerFactory.getLogger(AccumuloSerDe.class); + + public void initialize(Configuration conf, Properties properties) throws SerDeException { + accumuloSerDeParameters = new AccumuloSerDeParameters(conf, properties, getClass().getName()); + + final SerDeParameters serDeParams = accumuloSerDeParameters.getSerDeParameters(); + final List mappings = accumuloSerDeParameters.getColumnMappings(); + final List columnTypes = accumuloSerDeParameters.getHiveColumnTypes(); + final AccumuloRowIdFactory factory = accumuloSerDeParameters.getRowIdFactory(); + + ArrayList columnObjectInspectors = getColumnObjectInspectors(columnTypes, serDeParams, mappings, factory); + + cachedObjectInspector = LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( + serDeParams.getColumnNames(), columnObjectInspectors, serDeParams.getSeparators()[0], + serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + cachedRow = new LazyAccumuloRow((LazySimpleStructObjectInspector) cachedObjectInspector); + + serializer = new AccumuloRowSerializer(accumuloSerDeParameters.getRowIdOffset(), + accumuloSerDeParameters.getSerDeParameters(), accumuloSerDeParameters.getColumnMappings(), + accumuloSerDeParameters.getTableVisibilityLabel(), + accumuloSerDeParameters.getRowIdFactory()); + + if (log.isInfoEnabled()) { + log.info("Initialized with {} type: {}", accumuloSerDeParameters.getSerDeParameters() + .getColumnNames(), accumuloSerDeParameters.getSerDeParameters().getColumnTypes()); + } + } + + protected ArrayList getColumnObjectInspectors(List columnTypes, + SerDeParameters serDeParams, List mappings, AccumuloRowIdFactory factory) + throws SerDeException { + ArrayList columnObjectInspectors = new ArrayList( + columnTypes.size()); + for (int i = 0; i < columnTypes.size(); i++) { + TypeInfo type = columnTypes.get(i); + ColumnMapping mapping = mappings.get(i); + if (mapping instanceof HiveAccumuloRowIdColumnMapping) { + columnObjectInspectors.add(factory.createRowIdObjectInspector(type)); + } else { + columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(type, + serDeParams.getSeparators(), 1, serDeParams.getNullSequence(), serDeParams.isEscaped(), + serDeParams.getEscapeChar())); + } + } + + return columnObjectInspectors; + } + + /*** + * For testing purposes. + */ + public LazyAccumuloRow getCachedRow() { + return cachedRow; + } + + public Class getSerializedClass() { + return Mutation.class; + } + + @Override + public Writable serialize(Object o, ObjectInspector objectInspector) throws SerDeException { + try { + return serializer.serialize(o, objectInspector); + } catch (IOException e) { + throw new SerDeException(e); + } + } + + @Override + public Object deserialize(Writable writable) throws SerDeException { + if (!(writable instanceof AccumuloHiveRow)) { + throw new SerDeException(getClass().getName() + " : " + "Expected AccumuloHiveRow. Got " + + writable.getClass().getName()); + } + + cachedRow.init((AccumuloHiveRow) writable, accumuloSerDeParameters.getColumnMappings(), + accumuloSerDeParameters.getRowIdFactory()); + + return cachedRow; + } + + public ObjectInspector getObjectInspector() throws SerDeException { + return cachedObjectInspector; + } + + public SerDeStats getSerDeStats() { + throw new UnsupportedOperationException("SerdeStats not supported."); + } + + public AccumuloSerDeParameters getParams() { + return accumuloSerDeParameters; + } + + public boolean getIteratorPushdown() { + return accumuloSerDeParameters.getIteratorPushdown(); + } + + protected AccumuloRowSerializer getSerializer() { + return serializer; + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java new file mode 100644 index 0000000..ef77697 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Properties; + +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.log4j.Logger; + +import com.google.common.base.Preconditions; + +/** + * + */ +public class AccumuloSerDeParameters extends AccumuloConnectionParameters { + private static final Logger log = Logger.getLogger(AccumuloSerDeParameters.class); + + public static final String COLUMN_MAPPINGS = "accumulo.columns.mapping"; + public static final String ITERATOR_PUSHDOWN_KEY = "accumulo.iterator.pushdown"; + public static final boolean ITERATOR_PUSHDOWN_DEFAULT = true; + + public static final String DEFAULT_STORAGE_TYPE = "accumulo.default.storage"; + + public static final String VISIBILITY_LABEL_KEY = "accumulo.visibility.label"; + public static final ColumnVisibility DEFAULT_VISIBILITY_LABEL = new ColumnVisibility(); + + public static final String AUTHORIZATIONS_KEY = "accumulo.authorizations"; + + public static final String COMPOSITE_ROWID_FACTORY = "accumulo.composite.rowid.factory"; + public static final String COMPOSITE_ROWID_CLASS = "accumulo.composite.rowid"; + + protected final ColumnMapper columnMapper; + + private Properties tableProperties; + private String serdeName; + private SerDeParameters lazySerDeParameters; + private AccumuloRowIdFactory rowIdFactory; + + public AccumuloSerDeParameters(Configuration conf, Properties tableProperties, String serdeName) + throws SerDeException { + super(conf); + this.tableProperties = tableProperties; + this.serdeName = serdeName; + + lazySerDeParameters = LazySimpleSerDe.initSerdeParams(conf, tableProperties, serdeName); + + // The default encoding for this table when not otherwise specified + String defaultStorage = tableProperties.getProperty(DEFAULT_STORAGE_TYPE); + + columnMapper = new ColumnMapper(getColumnMappingValue(), defaultStorage, + lazySerDeParameters.getColumnNames(), lazySerDeParameters.getColumnTypes()); + + log.info("Constructed column mapping " + columnMapper); + + // Generate types for column mapping + if (null == getColumnTypeValue()) { + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnMapper.getTypesString()); + } + + if (columnMapper.size() < lazySerDeParameters.getColumnNames().size()) { + throw new TooManyHiveColumnsException("You have more " + COLUMN_MAPPINGS + + " fields than hive columns"); + } else if (columnMapper.size() > lazySerDeParameters.getColumnNames().size()) { + throw new TooManyAccumuloColumnsException( + "You have more hive columns than fields mapped with " + COLUMN_MAPPINGS); + } + + this.rowIdFactory = initRowIdFactory(conf, tableProperties); + } + + protected AccumuloRowIdFactory initRowIdFactory(Configuration conf, Properties tbl) + throws SerDeException { + try { + AccumuloRowIdFactory keyFactory = createRowIdFactory(conf, tbl); + if (keyFactory != null) { + keyFactory.init(this, tbl); + } + return keyFactory; + } catch (Exception e) { + throw new SerDeException(e); + } + } + + @SuppressWarnings({"rawtypes", "unchecked"}) + protected AccumuloRowIdFactory createRowIdFactory(Configuration job, Properties tbl) + throws Exception { + // Try to load the composite factory if one was provided + String factoryClassName = tbl.getProperty(COMPOSITE_ROWID_FACTORY); + if (factoryClassName != null) { + log.info("Loading CompositeRowIdFactory class " + factoryClassName); + Class factoryClazz = Class.forName(factoryClassName); + return (AccumuloRowIdFactory) ReflectionUtils.newInstance(factoryClazz, job); + } + + // See if a custom CompositeKey class was provided + String keyClassName = tbl.getProperty(COMPOSITE_ROWID_CLASS); + if (keyClassName != null) { + log.info("Loading CompositeRowId class " + keyClassName); + Class keyClass = Class.forName(keyClassName); + Class compositeRowIdClass = keyClass + .asSubclass(AccumuloCompositeRowId.class); + return new CompositeAccumuloRowIdFactory(compositeRowIdClass); + } + + return new DefaultAccumuloRowIdFactory(); + } + + public SerDeParameters getSerDeParameters() { + return lazySerDeParameters; + } + + public Properties getTableProperties() { + return tableProperties; + } + + public String getColumnTypeValue() { + return tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES); + } + + public String getSerDeName() { + return serdeName; + } + + public String getColumnMappingValue() { + return tableProperties.getProperty(COLUMN_MAPPINGS); + } + + public HiveAccumuloRowIdColumnMapping getRowIdColumnMapping() { + return columnMapper.getRowIdMapping(); + } + + public boolean getIteratorPushdown() { + return conf.getBoolean(ITERATOR_PUSHDOWN_KEY, ITERATOR_PUSHDOWN_DEFAULT); + } + + public List getHiveColumnNames() { + return Collections.unmodifiableList(lazySerDeParameters.getColumnNames()); + } + + public List getHiveColumnTypes() { + return Collections.unmodifiableList(lazySerDeParameters.getColumnTypes()); + } + + public ColumnMapper getColumnMapper() { + return columnMapper; + } + + public int getRowIdOffset() { + return columnMapper.getRowIdOffset(); + } + + public List getColumnMappings() { + return columnMapper.getColumnMappings(); + } + + public AccumuloRowIdFactory getRowIdFactory() { + return rowIdFactory; + } + + public String getRowIdHiveColumnName() { + int rowIdOffset = columnMapper.getRowIdOffset(); + if (-1 == rowIdOffset) { + return null; + } + + List hiveColumnNames = lazySerDeParameters.getColumnNames(); + if (0 > rowIdOffset || hiveColumnNames.size() <= rowIdOffset) { + throw new IllegalStateException("Tried to find rowID offset at position " + rowIdOffset + + " from Hive columns " + hiveColumnNames); + } + + return hiveColumnNames.get(rowIdOffset); + } + + public ColumnMapping getColumnMappingForHiveColumn(String hiveColumn) { + List hiveColumnNames = lazySerDeParameters.getColumnNames(); + + for (int offset = 0; offset < hiveColumnNames.size() && offset < columnMapper.size(); offset++) { + String hiveColumnName = hiveColumnNames.get(offset); + if (hiveColumn.equals(hiveColumnName)) { + return columnMapper.get(offset); + } + } + + throw new NoSuchElementException("Could not find column mapping for Hive column " + hiveColumn); + } + + public TypeInfo getTypeForHiveColumn(String hiveColumn) { + List hiveColumnNames = lazySerDeParameters.getColumnNames(); + List hiveColumnTypes = lazySerDeParameters.getColumnTypes(); + + for (int i = 0; i < hiveColumnNames.size() && i < hiveColumnTypes.size(); i++) { + String columnName = hiveColumnNames.get(i); + if (hiveColumn.equals(columnName)) { + return hiveColumnTypes.get(i); + } + } + + throw new NoSuchElementException("Could not find Hive column type for " + hiveColumn); + } + + /** + * Extracts the table property to allow a custom ColumnVisibility label to be set on updates to be + * written to an Accumulo table. The value in the table property must be a properly formatted + * {@link ColumnVisibility}. If not value is present in the table properties, an empty + * ColumnVisibility is returned. + * + * @return The ColumnVisibility to be applied to all updates sent to Accumulo + */ + public ColumnVisibility getTableVisibilityLabel() { + String visibilityLabel = tableProperties.getProperty(VISIBILITY_LABEL_KEY, null); + if (null == visibilityLabel || visibilityLabel.isEmpty()) { + return DEFAULT_VISIBILITY_LABEL; + } + + return new ColumnVisibility(visibilityLabel); + } + + /** + * Extracts the table property to allow dynamic Accumulo Authorizations to be used when reading + * data from an Accumulo table. If no Authorizations are provided in the table properties, null is + * returned to preserve the functionality to read all data that the current user has access to. + * + * @return The Authorizations that should be used to read data from Accumulo, null if no + * configuration is supplied. + */ + public Authorizations getAuthorizations() { + String authorizationStr = tableProperties.getProperty(AUTHORIZATIONS_KEY, null); + + return getAuthorizationsFromValue(authorizationStr); + } + + /** + * Create an Authorizations object when the provided value is not null. Will return null, + * otherwise. + * + * @param authorizationStr + * Configuration value to parse + * @return Authorization object or null + */ + protected static Authorizations getAuthorizationsFromValue(String authorizationStr) { + if (null == authorizationStr) { + return null; + } + + return new Authorizations(authorizationStr); + } + + /** + * Extract any configuration on Authorizations to be used from the provided Configuration. If a + * non-null value is not present in the configuration, a null object is returned + * + * @return Authorization built from configuration value, null if no value is present in conf + */ + public static Authorizations getAuthorizationsFromConf(Configuration conf) { + Preconditions.checkNotNull(conf); + + String authorizationStr = conf.get(AUTHORIZATIONS_KEY, null); + + return getAuthorizationsFromValue(authorizationStr); + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/CompositeAccumuloRowIdFactory.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/CompositeAccumuloRowIdFactory.java new file mode 100644 index 0000000..574a8aa --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/CompositeAccumuloRowIdFactory.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.accumulo.serde; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.Utils; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.log4j.Logger; + +/** + * {@link AccumuloRowIdFactory} designed for injection of the {@link AccumuloCompositeRowId} to be + * used to generate the Accumulo rowId. Allows for custom {@link AccumuloCompositeRowId}s to be + * specified without overriding the entire ObjectInspector for the Hive row. + * + * @param + */ +public class CompositeAccumuloRowIdFactory extends + DefaultAccumuloRowIdFactory { + + public static final Logger log = Logger.getLogger(CompositeAccumuloRowIdFactory.class); + + private final Class keyClass; + private final Constructor constructor; + + public CompositeAccumuloRowIdFactory(Class keyClass) throws SecurityException, + NoSuchMethodException { + // see javadoc of AccumuloCompositeRowId + this.keyClass = keyClass; + this.constructor = keyClass.getDeclaredConstructor(LazySimpleStructObjectInspector.class, + Properties.class, Configuration.class); + } + + @Override + public void addDependencyJars(Configuration jobConf) throws IOException { + // Make sure the jar containing the custom CompositeRowId is included + // in the mapreduce job's classpath (libjars) + Utils.addDependencyJars(jobConf, keyClass); + } + + @Override + public T createRowId(ObjectInspector inspector) throws SerDeException { + try { + return (T) constructor.newInstance(inspector, this.properties, + this.accumuloSerDeParams.getConf()); + } catch (Exception e) { + throw new SerDeException(e); + } + } +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/DefaultAccumuloRowIdFactory.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/DefaultAccumuloRowIdFactory.java new file mode 100644 index 0000000..1180679 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/DefaultAccumuloRowIdFactory.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.accumulo.serde; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.Utils; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +/** + * Default implementation of the AccumuloRowIdFactory which uses the normal + * {@link AccumuloRowSerializer} methods to serialize the field for storage into Accumulo. + */ +public class DefaultAccumuloRowIdFactory implements AccumuloRowIdFactory { + + protected AccumuloSerDeParameters accumuloSerDeParams; + protected LazySimpleSerDe.SerDeParameters serdeParams; + protected Properties properties; + protected HiveAccumuloRowIdColumnMapping rowIdMapping; + protected AccumuloRowSerializer serializer; + + @Override + public void init(AccumuloSerDeParameters accumuloSerDeParams, Properties properties) + throws SerDeException { + this.accumuloSerDeParams = accumuloSerDeParams; + this.serdeParams = accumuloSerDeParams.getSerDeParameters(); + this.properties = properties; + this.serializer = new AccumuloRowSerializer(accumuloSerDeParams.getRowIdOffset(), serdeParams, + accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getTableVisibilityLabel(), + this); + this.rowIdMapping = accumuloSerDeParams.getRowIdColumnMapping(); + } + + @Override + public void addDependencyJars(Configuration conf) throws IOException { + Utils.addDependencyJars(conf, getClass()); + } + + @Override + public ObjectInspector createRowIdObjectInspector(TypeInfo type) throws SerDeException { + return LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), 1, + serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); + } + + @Override + public LazyObjectBase createRowId(ObjectInspector inspector) throws SerDeException { + // LazyObject can only be binary when it's not a string as well +// return LazyFactory.createLazyObject(inspector, +// ColumnEncoding.BINARY == rowIdMapping.getEncoding()); + return LazyFactory.createLazyObject(inspector, + inspector.getTypeName() != TypeInfoFactory.stringTypeInfo.getTypeName() + && ColumnEncoding.BINARY == rowIdMapping.getEncoding()); + } + + @Override + public byte[] serializeRowId(Object object, StructField field, ByteStream.Output output) + throws IOException { + return serializer.serializeRowId(object, field, rowIdMapping); + } + +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/TooManyAccumuloColumnsException.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/TooManyAccumuloColumnsException.java new file mode 100644 index 0000000..7a84b7d --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/TooManyAccumuloColumnsException.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import org.apache.hadoop.hive.serde2.SerDeException; + +/** + * + */ +public class TooManyAccumuloColumnsException extends SerDeException { + + private static final long serialVersionUID = 1L; + + public TooManyAccumuloColumnsException() { + super(); + } + + public TooManyAccumuloColumnsException(String message, Throwable cause) { + super(message, cause); + } + + public TooManyAccumuloColumnsException(String message) { + super(message); + } + + public TooManyAccumuloColumnsException(Throwable cause) { + super(cause); + } + +} diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/TooManyHiveColumnsException.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/TooManyHiveColumnsException.java new file mode 100644 index 0000000..848d7a4 --- /dev/null +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/TooManyHiveColumnsException.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import org.apache.hadoop.hive.serde2.SerDeException; + +/** + * + */ +public class TooManyHiveColumnsException extends SerDeException { + + private static final long serialVersionUID = 1L; + + public TooManyHiveColumnsException() { + super(); + } + + public TooManyHiveColumnsException(String message, Throwable cause) { + super(message, cause); + } + + public TooManyHiveColumnsException(String message) { + super(message); + } + + public TooManyHiveColumnsException(Throwable cause) { + super(cause); + } + +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloConnectionParameters.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloConnectionParameters.java new file mode 100644 index 0000000..8b4c9ff --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloConnectionParameters.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Instance; +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +/** + * + */ +public class TestAccumuloConnectionParameters { + + @Test + public void testInstantiatesWithNullConfiguration() { + // TableDesc#getDeserializer() passes a null Configuration into the SerDe. + // We shouldn't fail immediately in this case + AccumuloConnectionParameters cnxnParams = new AccumuloConnectionParameters(null); + + // We should fail if we try to get info out of the params + try { + cnxnParams.getAccumuloInstanceName(); + Assert.fail("Should have gotten an NPE"); + } catch (NullPointerException e) {} + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingInstanceName() { + Configuration conf = new Configuration(false); + conf.set(AccumuloConnectionParameters.ZOOKEEPERS, "localhost:2181"); + conf.set(AccumuloConnectionParameters.USER_NAME, "user"); + conf.set(AccumuloConnectionParameters.USER_PASS, "password"); + + AccumuloConnectionParameters cnxnParams = new AccumuloConnectionParameters(conf); + cnxnParams.getInstance(); + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingZooKeepers() { + Configuration conf = new Configuration(false); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, "accumulo"); + conf.set(AccumuloConnectionParameters.USER_NAME, "user"); + conf.set(AccumuloConnectionParameters.USER_PASS, "password"); + + AccumuloConnectionParameters cnxnParams = new AccumuloConnectionParameters(conf); + cnxnParams.getInstance(); + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingUserName() throws AccumuloException, AccumuloSecurityException { + Configuration conf = new Configuration(false); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, "accumulo"); + conf.set(AccumuloConnectionParameters.ZOOKEEPERS, "localhost:2181"); + conf.set(AccumuloConnectionParameters.USER_PASS, "password"); + + Instance instance = Mockito.mock(Instance.class); + + AccumuloConnectionParameters cnxnParams = new AccumuloConnectionParameters(conf); + + // Provide an instance of the code doesn't try to make a real Instance + // We just want to test that we fail before trying to make a connector + // with null username + cnxnParams.getConnector(instance); + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingPassword() throws AccumuloException, AccumuloSecurityException { + Configuration conf = new Configuration(false); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, "accumulo"); + conf.set(AccumuloConnectionParameters.ZOOKEEPERS, "localhost:2181"); + conf.set(AccumuloConnectionParameters.USER_NAME, "user"); + + Instance instance = Mockito.mock(Instance.class); + + AccumuloConnectionParameters cnxnParams = new AccumuloConnectionParameters(conf); + + // Provide an instance of the code doesn't try to make a real Instance + // We just want to test that we fail before trying to make a connector + // with null password + cnxnParams.getConnector(instance); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloHiveRow.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloHiveRow.java new file mode 100644 index 0000000..fc90e36 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloHiveRow.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.io.Text; +import org.junit.Test; + +/** + * Test basic operations on AccumuloHiveRow + */ +public class TestAccumuloHiveRow { + + @Test + public void testHasFamilyAndQualifier() { + AccumuloHiveRow row = new AccumuloHiveRow("row1"); + + // Add some columns + for (int i = 1; i <= 5; i++) { + row.add("cf1", "cq" + i, Integer.toString(i).getBytes()); + } + + // Check that we don't find unexpected columns + assertFalse(row.hasFamAndQual(new Text(""), new Text(""))); + assertFalse(row.hasFamAndQual(new Text("cf0"), new Text("cq1"))); + assertFalse(row.hasFamAndQual(new Text("cf1"), new Text("cq0"))); + + // Check that we do find all expected columns + for (int i = 1; i <= 5; i++) { + assertTrue(row.hasFamAndQual(new Text("cf1"), new Text("cq" + i))); + } + } + + @Test + public void testGetValueFromColumn() { + AccumuloHiveRow row = new AccumuloHiveRow("row1"); + + // Should return null when there is no column + assertNull(row.getValue(new Text(""), new Text(""))); + + for (int i = 1; i <= 5; i++) { + row.add("cf", "cq" + i, Integer.toString(i).getBytes()); + } + + assertNull(row.getValue(new Text("cf"), new Text("cq0"))); + + for (int i = 1; i <= 5; i++) { + assertArrayEquals(Integer.toString(i).getBytes(), + row.getValue(new Text("cf"), new Text("cq" + i))); + } + } + + @Test + public void testWritableEmptyRow() throws IOException { + AccumuloHiveRow emptyRow = new AccumuloHiveRow(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + emptyRow.write(out); + out.close(); + + AccumuloHiveRow emptyCopy = new AccumuloHiveRow(); + + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + DataInputStream in = new DataInputStream(bais); + emptyCopy.readFields(in); + + assertEquals(emptyRow, emptyCopy); + } + + @Test + public void testWritableWithColumns() throws IOException { + AccumuloHiveRow rowWithColumns = new AccumuloHiveRow("row"); + rowWithColumns.add("cf", "cq1", "1".getBytes()); + rowWithColumns.add("cf", "cq2", "2".getBytes()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + rowWithColumns.write(out); + out.close(); + + AccumuloHiveRow copy = new AccumuloHiveRow(); + + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + DataInputStream in = new DataInputStream(bais); + copy.readFields(in); + + assertEquals(rowWithColumns, copy); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java new file mode 100644 index 0000000..0aaa782 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestAccumuloStorageHandler.java @@ -0,0 +1,536 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.mockito.Mockito; + +/** + * + */ +public class TestAccumuloStorageHandler { + + protected AccumuloStorageHandler storageHandler; + + @Rule + public TestName test = new TestName(); + + @Before + public void setup() { + storageHandler = new AccumuloStorageHandler(); + } + + @Test + public void testTablePropertiesPassedToOutputJobProperties() { + TableDesc tableDesc = Mockito.mock(TableDesc.class); + Properties props = new Properties(); + Map jobProperties = new HashMap(); + + props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq1,cf:cq2,cf:cq3"); + props.setProperty(AccumuloSerDeParameters.TABLE_NAME, "table"); + props.setProperty(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY, "foo"); + + Mockito.when(tableDesc.getProperties()).thenReturn(props); + + storageHandler.configureOutputJobProperties(tableDesc, jobProperties); + + Assert.assertEquals(3, jobProperties.size()); + Assert.assertTrue("Job properties did not contain column mappings", + jobProperties.containsKey(AccumuloSerDeParameters.COLUMN_MAPPINGS)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS), + jobProperties.get(AccumuloSerDeParameters.COLUMN_MAPPINGS)); + + Assert.assertTrue("Job properties did not contain accumulo table name", + jobProperties.containsKey(AccumuloSerDeParameters.TABLE_NAME)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.TABLE_NAME), + jobProperties.get(AccumuloSerDeParameters.TABLE_NAME)); + + Assert.assertTrue("Job properties did not contain visibility label", + jobProperties.containsKey(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY), + jobProperties.get(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY)); + } + + @Test + public void testTablePropertiesPassedToInputJobProperties() { + TableDesc tableDesc = Mockito.mock(TableDesc.class); + Properties props = new Properties(); + Map jobProperties = new HashMap(); + + props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq1,cf:cq2,cf:cq3"); + props.setProperty(AccumuloSerDeParameters.TABLE_NAME, "table"); + props.setProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, "true"); + props + .setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName()); + props.setProperty(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar"); + + Mockito.when(tableDesc.getProperties()).thenReturn(props); + + storageHandler.configureInputJobProperties(tableDesc, jobProperties); + + Assert.assertEquals(5, jobProperties.size()); + + Assert.assertTrue(jobProperties.containsKey(AccumuloSerDeParameters.COLUMN_MAPPINGS)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS), + jobProperties.get(AccumuloSerDeParameters.COLUMN_MAPPINGS)); + + Assert.assertTrue(jobProperties.containsKey(AccumuloSerDeParameters.TABLE_NAME)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.TABLE_NAME), + jobProperties.get(AccumuloSerDeParameters.TABLE_NAME)); + + Assert.assertTrue(jobProperties.containsKey(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY), + jobProperties.get(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY)); + + Assert.assertTrue(jobProperties.containsKey(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), + jobProperties.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE)); + + Assert.assertTrue(jobProperties.containsKey(AccumuloSerDeParameters.AUTHORIZATIONS_KEY)); + Assert.assertEquals(props.getProperty(AccumuloSerDeParameters.AUTHORIZATIONS_KEY), + jobProperties.get(AccumuloSerDeParameters.AUTHORIZATIONS_KEY)); + } + + @Test(expected = IllegalArgumentException.class) + public void testNonBooleanIteratorPushdownValue() { + TableDesc tableDesc = Mockito.mock(TableDesc.class); + Properties props = new Properties(); + Map jobProperties = new HashMap(); + + props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq1,cf:cq2,cf:cq3"); + props.setProperty(AccumuloSerDeParameters.TABLE_NAME, "table"); + props.setProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, "foo"); + + Mockito.when(tableDesc.getProperties()).thenReturn(props); + + storageHandler.configureInputJobProperties(tableDesc, jobProperties); + } + + @Test(expected = IllegalArgumentException.class) + public void testEmptyIteratorPushdownValue() { + TableDesc tableDesc = Mockito.mock(TableDesc.class); + Properties props = new Properties(); + Map jobProperties = new HashMap(); + + props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq1,cf:cq2,cf:cq3"); + props.setProperty(AccumuloSerDeParameters.TABLE_NAME, "table"); + props.setProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, ""); + + Mockito.when(tableDesc.getProperties()).thenReturn(props); + + storageHandler.configureInputJobProperties(tableDesc, jobProperties); + } + + @Test + public void testTableJobPropertiesCallsInputAndOutputMethods() { + AccumuloStorageHandler mockStorageHandler = Mockito.mock(AccumuloStorageHandler.class); + TableDesc tableDesc = Mockito.mock(TableDesc.class); + Map jobProperties = new HashMap(); + + Mockito.doCallRealMethod().when(mockStorageHandler) + .configureTableJobProperties(tableDesc, jobProperties); + + // configureTableJobProperties shouldn't be getting called by Hive, but, if it somehow does, + // we should just set all of the configurations for input and output. + mockStorageHandler.configureTableJobProperties(tableDesc, jobProperties); + + Mockito.verify(mockStorageHandler).configureInputJobProperties(tableDesc, jobProperties); + Mockito.verify(mockStorageHandler).configureOutputJobProperties(tableDesc, jobProperties); + } + + @Test + public void testPreCreateTable() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + String tableName = "table"; + + // Define the SerDe Parameters + Map params = new HashMap(); + params.put(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq"); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + StorageDescriptor sd = Mockito.mock(StorageDescriptor.class); + Table table = Mockito.mock(Table.class); + SerDeInfo serDeInfo = Mockito.mock(SerDeInfo.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).preCreateTable(table); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(false); + + // Return the mocked StorageDescriptor + Mockito.when(table.getSd()).thenReturn(sd); + + // No location expected with AccumuloStorageHandler + Mockito.when(sd.getLocation()).thenReturn(null); + + // Return mocked SerDeInfo + Mockito.when(sd.getSerdeInfo()).thenReturn(serDeInfo); + + // Custom parameters + Mockito.when(serDeInfo.getParameters()).thenReturn(params); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.preCreateTable(table); + + Assert.assertTrue("Table does not exist when we expect it to", + conn.tableOperations().exists(tableName)); + } + + @Test(expected = MetaException.class) + public void testMissingColumnMappingFails() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + String tableName = "table"; + + // Empty parameters are sent, no COLUMN_MAPPING + Map params = new HashMap(); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + StorageDescriptor sd = Mockito.mock(StorageDescriptor.class); + Table table = Mockito.mock(Table.class); + SerDeInfo serDeInfo = Mockito.mock(SerDeInfo.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).preCreateTable(table); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(false); + + // Return the mocked StorageDescriptor + Mockito.when(table.getSd()).thenReturn(sd); + + // No location expected with AccumuloStorageHandler + Mockito.when(sd.getLocation()).thenReturn(null); + + // Return mocked SerDeInfo + Mockito.when(sd.getSerdeInfo()).thenReturn(serDeInfo); + + // Custom parameters + Mockito.when(serDeInfo.getParameters()).thenReturn(params); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.preCreateTable(table); + } + + @Test(expected = MetaException.class) + public void testNonNullLocation() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + String tableName = "table"; + + // Empty parameters are sent, no COLUMN_MAPPING + Map params = new HashMap(); + params.put(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq"); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + StorageDescriptor sd = Mockito.mock(StorageDescriptor.class); + Table table = Mockito.mock(Table.class); + SerDeInfo serDeInfo = Mockito.mock(SerDeInfo.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).preCreateTable(table); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(false); + + // Return the mocked StorageDescriptor + Mockito.when(table.getSd()).thenReturn(sd); + + // No location expected with AccumuloStorageHandler + Mockito.when(sd.getLocation()).thenReturn("foobar"); + + // Return mocked SerDeInfo + Mockito.when(sd.getSerdeInfo()).thenReturn(serDeInfo); + + // Custom parameters + Mockito.when(serDeInfo.getParameters()).thenReturn(params); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.preCreateTable(table); + } + + @Test(expected = MetaException.class) + public void testExternalNonExistentTableFails() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + String tableName = "table"; + + // Define the SerDe Parameters + Map params = new HashMap(); + params.put(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq"); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + StorageDescriptor sd = Mockito.mock(StorageDescriptor.class); + Table table = Mockito.mock(Table.class); + SerDeInfo serDeInfo = Mockito.mock(SerDeInfo.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).preCreateTable(table); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Is an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(true); + + // Return the mocked StorageDescriptor + Mockito.when(table.getSd()).thenReturn(sd); + + // No location expected with AccumuloStorageHandler + Mockito.when(sd.getLocation()).thenReturn(null); + + // Return mocked SerDeInfo + Mockito.when(sd.getSerdeInfo()).thenReturn(serDeInfo); + + // Custom parameters + Mockito.when(serDeInfo.getParameters()).thenReturn(params); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.preCreateTable(table); + } + + @Test(expected = MetaException.class) + public void testNonExternalExistentTable() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + String tableName = "table"; + + // Create the table + conn.tableOperations().create(tableName); + + // Define the SerDe Parameters + Map params = new HashMap(); + params.put(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:cq"); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + StorageDescriptor sd = Mockito.mock(StorageDescriptor.class); + Table table = Mockito.mock(Table.class); + SerDeInfo serDeInfo = Mockito.mock(SerDeInfo.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).preCreateTable(table); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Is not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(false); + + // Return the mocked StorageDescriptor + Mockito.when(table.getSd()).thenReturn(sd); + + // No location expected with AccumuloStorageHandler + Mockito.when(sd.getLocation()).thenReturn(null); + + // Return mocked SerDeInfo + Mockito.when(sd.getSerdeInfo()).thenReturn(serDeInfo); + + // Custom parameters + Mockito.when(serDeInfo.getParameters()).thenReturn(params); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.preCreateTable(table); + } + + @Test() + public void testRollbackCreateTableOnNonExistentTable() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + String tableName = "table"; + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + Table table = Mockito.mock(Table.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).rollbackCreateTable(table); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Is not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(false); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.rollbackCreateTable(table); + } + + @Test() + public void testRollbackCreateTableDeletesExistentTable() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + String tableName = "table"; + + // Create the table + conn.tableOperations().create(tableName); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + Table table = Mockito.mock(Table.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).rollbackCreateTable(table); + Mockito.doCallRealMethod().when(storageHandler).commitDropTable(table, true); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Is not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(false); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.rollbackCreateTable(table); + + Assert.assertFalse(conn.tableOperations().exists(tableName)); + } + + @Test() + public void testRollbackCreateTableDoesntDeleteExternalExistentTable() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + String tableName = "table"; + + // Create the table + conn.tableOperations().create(tableName); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + Table table = Mockito.mock(Table.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).rollbackCreateTable(table); + Mockito.doCallRealMethod().when(storageHandler).commitDropTable(table, true); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Is not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(true); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.rollbackCreateTable(table); + + Assert.assertTrue(conn.tableOperations().exists(tableName)); + } + + @Test + public void testDropTableWithoutDeleteLeavesTableIntact() throws Exception { + MockInstance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + AccumuloStorageHandler storageHandler = Mockito.mock(AccumuloStorageHandler.class); + String tableName = "table"; + + // Create the table + conn.tableOperations().create(tableName); + + AccumuloConnectionParameters connectionParams = Mockito + .mock(AccumuloConnectionParameters.class); + Table table = Mockito.mock(Table.class); + + // Call the real preCreateTable method + Mockito.doCallRealMethod().when(storageHandler).commitDropTable(table, false); + + // Return our known table name + Mockito.when(storageHandler.getTableName(table)).thenReturn(tableName); + + // Is not an EXTERNAL table + Mockito.when(storageHandler.isExternalTable(table)).thenReturn(false); + + // Return the MockInstance's Connector + Mockito.when(connectionParams.getConnector()).thenReturn(conn); + + storageHandler.connectionParams = connectionParams; + + storageHandler.rollbackCreateTable(table); + + Assert.assertTrue(conn.tableOperations().exists(tableName)); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloMap.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloMap.java new file mode 100644 index 0000000..2479fb4 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloMap.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazyString; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +/** + * + */ +public class TestLazyAccumuloMap { + + protected byte[] toBytes(int i) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + out.writeInt(i); + out.close(); + return baos.toByteArray(); + } + + @Test + public void testStringMapWithProjection() throws SerDeException { + AccumuloHiveRow row = new AccumuloHiveRow("row"); + + row.add("cf1", "foo", "bar".getBytes()); + row.add("cf1", "bar", "foo".getBytes()); + + row.add("cf2", "foo1", "bar1".getBytes()); + row.add("cf3", "bar1", "foo1".getBytes()); + + HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, + ColumnEncoding.STRING, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo( + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo).toString()); + + // Map of Integer to String + Text nullSequence = new Text("\\N"); + ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils + .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, + 0, nullSequence, false, (byte) 0); + + LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); + map.init(row, mapping); + + Assert.assertEquals(2, map.getMapSize()); + + Object o = map.getMapValueElement(new Text("foo")); + Assert.assertNotNull(o); + Assert.assertEquals(new Text("bar"), ((LazyString) o).getWritableObject()); + + o = map.getMapValueElement(new Text("bar")); + Assert.assertNotNull(o); + Assert.assertEquals(new Text("foo"), ((LazyString) o).getWritableObject()); + } + + @Test + public void testIntMap() throws SerDeException, IOException { + AccumuloHiveRow row = new AccumuloHiveRow("row"); + + row.add(new Text("cf1"), new Text("1"), "2".getBytes()); + row.add(new Text("cf1"), new Text("2"), "4".getBytes()); + row.add(new Text("cf1"), new Text("3"), "6".getBytes()); + + HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, + ColumnEncoding.STRING, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo( + TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString()); + + // Map of Integer to Integer + Text nullSequence = new Text("\\N"); + ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils + .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, 0, + nullSequence, false, (byte) 0); + + LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); + map.init(row, mapping); + + Assert.assertEquals(3, map.getMapSize()); + + Object o = map.getMapValueElement(new IntWritable(1)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject()); + + o = map.getMapValueElement(new IntWritable(2)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject()); + + o = map.getMapValueElement(new IntWritable(3)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject()); + } + + @Test + public void testBinaryIntMap() throws SerDeException, IOException { + AccumuloHiveRow row = new AccumuloHiveRow("row"); + + row.add(new Text("cf1"), new Text(toBytes(1)), toBytes(2)); + row.add(new Text("cf1"), new Text(toBytes(2)), toBytes(4)); + row.add(new Text("cf1"), new Text(toBytes(3)), toBytes(6)); + + HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, + ColumnEncoding.BINARY, ColumnEncoding.BINARY, "column", TypeInfoFactory.getMapTypeInfo( + TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString()); + + // Map of Integer to String + Text nullSequence = new Text("\\N"); + ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils + .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, 0, + nullSequence, false, (byte) 0); + + LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); + map.init(row, mapping); + + Assert.assertEquals(3, map.getMapSize()); + + Object o = map.getMapValueElement(new IntWritable(1)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject()); + + o = map.getMapValueElement(new IntWritable(2)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject()); + + o = map.getMapValueElement(new IntWritable(3)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject()); + } + + @Test + public void testMixedSerializationMap() throws SerDeException, IOException { + AccumuloHiveRow row = new AccumuloHiveRow("row"); + + row.add(new Text("cf1"), new Text(toBytes(1)), "2".getBytes()); + row.add(new Text("cf1"), new Text(toBytes(2)), "4".getBytes()); + row.add(new Text("cf1"), new Text(toBytes(3)), "6".getBytes()); + + HiveAccumuloMapColumnMapping mapping = new HiveAccumuloMapColumnMapping("cf1", null, + ColumnEncoding.BINARY, ColumnEncoding.STRING, "column", TypeInfoFactory.getMapTypeInfo( + TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo).toString()); + + // Map of Integer to String + Text nullSequence = new Text("\\N"); + ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils + .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, 0, + nullSequence, false, (byte) 0); + + LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); + map.init(row, mapping); + + Assert.assertEquals(3, map.getMapSize()); + + Object o = map.getMapValueElement(new IntWritable(1)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(2), ((LazyInteger) o).getWritableObject()); + + o = map.getMapValueElement(new IntWritable(2)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(4), ((LazyInteger) o).getWritableObject()); + + o = map.getMapValueElement(new IntWritable(3)); + Assert.assertNotNull(o); + Assert.assertEquals(new IntWritable(6), ((LazyInteger) o).getWritableObject()); + } + +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloRow.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloRow.java new file mode 100644 index 0000000..e0b51cb --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloRow.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.lazy.LazyString; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazydio.LazyDioInteger; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.base.Joiner; + +/** + * + */ +public class TestLazyAccumuloRow { + + @Test + public void testExpectedDeserializationOfColumns() throws Exception { + List columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo); + + LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(columns, types, LazySimpleSerDe.DefaultSeparators, new Text( + "\\N"), false, false, (byte) '\\'); + + DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory(); + + Properties props = new Properties(); + props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, + ":rowid,personal:given_name,personal:surname,personal:age,personal:weight,personal:height"); + props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns)); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + + AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, + AccumuloSerDe.class.getName()); + + rowIdFactory.init(params, props); + + LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector); + AccumuloHiveRow hiveRow = new AccumuloHiveRow("1"); + hiveRow.add("personal", "given_name", "Bob".getBytes()); + hiveRow.add("personal", "surname", "Stevens".getBytes()); + hiveRow.add("personal", "age", "30".getBytes()); + hiveRow.add("personal", "weight", "200".getBytes()); + hiveRow.add("personal", "height", "72".getBytes()); + + ColumnMapper columnMapper = params.getColumnMapper(); + + lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory); + + Object o = lazyRow.getField(0); + Assert.assertEquals(LazyString.class, o.getClass()); + Assert.assertEquals("1", ((LazyString) o).toString()); + + o = lazyRow.getField(1); + Assert.assertEquals(LazyString.class, o.getClass()); + Assert.assertEquals("Bob", ((LazyString) o).toString()); + + o = lazyRow.getField(2); + Assert.assertEquals(LazyString.class, o.getClass()); + Assert.assertEquals("Stevens", ((LazyString) o).toString()); + + o = lazyRow.getField(3); + Assert.assertEquals(LazyInteger.class, o.getClass()); + Assert.assertEquals("30", ((LazyInteger) o).toString()); + + o = lazyRow.getField(4); + Assert.assertEquals(LazyInteger.class, o.getClass()); + Assert.assertEquals("200", ((LazyInteger) o).toString()); + + o = lazyRow.getField(5); + Assert.assertEquals(LazyInteger.class, o.getClass()); + Assert.assertEquals("72", ((LazyInteger) o).toString()); + } + + @Test + public void testDeserializationOfBinaryEncoding() throws Exception { + List columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo); + + LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(columns, types, LazySimpleSerDe.DefaultSeparators, new Text( + "\\N"), false, false, (byte) '\\'); + + DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory(); + + Properties props = new Properties(); + props + .setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, + ":rowid#s,personal:given_name#s,personal:surname#s,personal:age,personal:weight,personal:height"); + props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns)); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + props + .setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName()); + + AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, + AccumuloSerDe.class.getName()); + + rowIdFactory.init(params, props); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + + LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector); + AccumuloHiveRow hiveRow = new AccumuloHiveRow("1"); + hiveRow.add("personal", "given_name", "Bob".getBytes()); + hiveRow.add("personal", "surname", "Stevens".getBytes()); + + out.writeInt(30); + hiveRow.add("personal", "age", baos.toByteArray()); + + baos.reset(); + out.writeInt(200); + hiveRow.add("personal", "weight", baos.toByteArray()); + + baos.reset(); + out.writeInt(72); + hiveRow.add("personal", "height", baos.toByteArray()); + + ColumnMapper columnMapper = params.getColumnMapper(); + + lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory); + + Object o = lazyRow.getField(0); + Assert.assertNotNull(o); + Assert.assertEquals(LazyString.class, o.getClass()); + Assert.assertEquals("1", ((LazyString) o).toString()); + + o = lazyRow.getField(1); + Assert.assertNotNull(o); + Assert.assertEquals(LazyString.class, o.getClass()); + Assert.assertEquals("Bob", ((LazyString) o).toString()); + + o = lazyRow.getField(2); + Assert.assertNotNull(o); + Assert.assertEquals(LazyString.class, o.getClass()); + Assert.assertEquals("Stevens", ((LazyString) o).toString()); + + o = lazyRow.getField(3); + Assert.assertNotNull(o); + Assert.assertEquals(LazyDioInteger.class, o.getClass()); + Assert.assertEquals("30", ((LazyDioInteger) o).toString()); + + o = lazyRow.getField(4); + Assert.assertNotNull(o); + Assert.assertEquals(LazyDioInteger.class, o.getClass()); + Assert.assertEquals("200", ((LazyDioInteger) o).toString()); + + o = lazyRow.getField(5); + Assert.assertNotNull(o); + Assert.assertEquals(LazyDioInteger.class, o.getClass()); + Assert.assertEquals("72", ((LazyDioInteger) o).toString()); + } + + @Test + public void testNullInit() throws SerDeException { + List columns = Arrays.asList("row", "1", "2", "3"); + List types = Arrays. asList( + TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), + TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), + TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), + TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME)); + + LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(columns, types, LazySimpleSerDe.DefaultSeparators, new Text( + "\\N"), false, false, (byte) '\\'); + + DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory(); + + Properties props = new Properties(); + props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1,cf:cq2,cf:cq3"); + props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns)); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + + AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, + AccumuloSerDe.class.getName()); + + rowIdFactory.init(params, props); + + ColumnMapper columnMapper = params.getColumnMapper(); + + LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector); + AccumuloHiveRow hiveRow = new AccumuloHiveRow("1"); + hiveRow.add("cf", "cq1", "foo".getBytes()); + hiveRow.add("cf", "cq3", "bar".getBytes()); + + lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory); + + // Noticed that we also suffer from the same issue as HIVE-3179 + // Only want to call a field init'ed when it's non-NULL + // Check it twice, make sure we get null both times + Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), + SerDeUtils.getJSONString(lazyRow, objectInspector)); + Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), + SerDeUtils.getJSONString(lazyRow, objectInspector)); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnEncoding.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnEncoding.java new file mode 100644 index 0000000..8183181 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnEncoding.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import java.util.Map.Entry; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.collect.Maps; + +/** + * + */ +public class TestColumnEncoding { + + @Test(expected = IllegalArgumentException.class) + public void testInvalidCodeThrowsException() { + ColumnEncoding.fromCode("foo"); + } + + @Test + public void testStringEncoding() { + Assert.assertEquals(ColumnEncoding.STRING, ColumnEncoding.fromCode("s")); + } + + @Test + public void testBinaryEncoding() { + Assert.assertEquals(ColumnEncoding.BINARY, ColumnEncoding.fromCode("b")); + } + + @Test + public void testMissingColumnEncoding() { + Assert.assertFalse(ColumnEncoding.hasColumnEncoding("foo:bar")); + } + + @Test + public void testColumnEncodingSpecified() { + Assert.assertTrue(ColumnEncoding.hasColumnEncoding("foo:bar#s")); + } + + @Test + public void testEscapedPoundIsNoEncodingSpecified() { + Assert.assertFalse(ColumnEncoding.hasColumnEncoding("foo:b\\#ar")); + } + + @Test + public void testEscapedPoundWithRealPound() { + Assert.assertTrue(ColumnEncoding.hasColumnEncoding("foo:b\\#ar#b")); + } + + @Test + public void testParse() { + Assert.assertEquals(ColumnEncoding.STRING, ColumnEncoding.getFromMapping("foo:bar#s")); + } + + @Test + public void testParseWithEscapedPound() { + Assert.assertEquals(ColumnEncoding.BINARY, ColumnEncoding.getFromMapping("fo\\#o:bar#b")); + } + + @Test(expected = IllegalArgumentException.class) + public void testMissingEncodingOnParse() { + ColumnEncoding.getFromMapping("foo:bar"); + } + + @Test + public void testStripCode() { + String mapping = "foo:bar"; + Assert.assertEquals( + mapping, + ColumnEncoding.stripCode(mapping + AccumuloHiveConstants.POUND + + ColumnEncoding.BINARY.getCode())); + } + + @Test(expected = IllegalArgumentException.class) + public void testStripNonExistentCodeFails() { + ColumnEncoding.stripCode("foo:bar"); + } + + @Test + public void testStripCodeWithEscapedPound() { + String mapping = "foo:ba\\#r"; + + Assert.assertEquals( + mapping, + ColumnEncoding.stripCode(mapping + AccumuloHiveConstants.POUND + + ColumnEncoding.BINARY.getCode())); + } + + @Test + public void testMapEncoding() { + Assert.assertFalse(ColumnEncoding.isMapEncoding("s")); + Assert.assertFalse(ColumnEncoding.isMapEncoding("string")); + Assert.assertFalse(ColumnEncoding.isMapEncoding("binary")); + + Assert.assertTrue(ColumnEncoding.isMapEncoding("s:s")); + Assert.assertTrue(ColumnEncoding.isMapEncoding("s:string")); + Assert.assertTrue(ColumnEncoding.isMapEncoding("string:s")); + Assert.assertTrue(ColumnEncoding.isMapEncoding("string:string")); + } + + @Test + public void testMapEncodingParsing() { + Entry stringString = Maps.immutableEntry(ColumnEncoding.STRING, + ColumnEncoding.STRING), stringBinary = Maps.immutableEntry(ColumnEncoding.STRING, + ColumnEncoding.BINARY), binaryBinary = Maps.immutableEntry(ColumnEncoding.BINARY, + ColumnEncoding.BINARY), binaryString = Maps.immutableEntry(ColumnEncoding.BINARY, + ColumnEncoding.STRING); + + Assert.assertEquals(stringString, ColumnEncoding.getMapEncoding("s:s")); + Assert.assertEquals(stringString, ColumnEncoding.getMapEncoding("s:string")); + Assert.assertEquals(stringString, ColumnEncoding.getMapEncoding("string:s")); + Assert.assertEquals(stringString, ColumnEncoding.getMapEncoding("string:string")); + + Assert.assertEquals(stringBinary, ColumnEncoding.getMapEncoding("s:b")); + Assert.assertEquals(stringBinary, ColumnEncoding.getMapEncoding("string:b")); + Assert.assertEquals(stringBinary, ColumnEncoding.getMapEncoding("s:binary")); + Assert.assertEquals(stringBinary, ColumnEncoding.getMapEncoding("string:binary")); + + Assert.assertEquals(binaryString, ColumnEncoding.getMapEncoding("b:s")); + Assert.assertEquals(binaryString, ColumnEncoding.getMapEncoding("b:string")); + Assert.assertEquals(binaryString, ColumnEncoding.getMapEncoding("binary:s")); + Assert.assertEquals(binaryString, ColumnEncoding.getMapEncoding("binary:string")); + + Assert.assertEquals(binaryBinary, ColumnEncoding.getMapEncoding("b:b")); + Assert.assertEquals(binaryBinary, ColumnEncoding.getMapEncoding("binary:b")); + Assert.assertEquals(binaryBinary, ColumnEncoding.getMapEncoding("b:binary")); + Assert.assertEquals(binaryBinary, ColumnEncoding.getMapEncoding("binary:binary")); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnMapper.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnMapper.java new file mode 100644 index 0000000..e5c1e61 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnMapper.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.util.StringUtils; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.base.Joiner; + +/** + * + */ +public class TestColumnMapper { + + @Test + public void testNormalMapping() throws TooManyAccumuloColumnsException { + List rawMappings = Arrays.asList(AccumuloHiveConstants.ROWID, "cf:cq", "cf:_", + "cf:qual"); + List columnNames = Arrays.asList("row", "col1", "col2", "col3"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + ColumnMapper mapper = new ColumnMapper( + Joiner.on(AccumuloHiveConstants.COMMA).join(rawMappings), ColumnEncoding.STRING.getName(), + columnNames, columnTypes); + + List mappings = mapper.getColumnMappings(); + + Assert.assertEquals(rawMappings.size(), mappings.size()); + Assert.assertEquals(mappings.size(), mapper.size()); + + // Compare the Mapper get at offset method to the list of mappings + Iterator rawIter = rawMappings.iterator(); + Iterator iter = mappings.iterator(); + for (int i = 0; i < mappings.size() && iter.hasNext(); i++) { + String rawMapping = rawIter.next(); + ColumnMapping mapping = iter.next(); + ColumnMapping mappingByOffset = mapper.get(i); + + Assert.assertEquals(mapping, mappingByOffset); + + // Ensure that we get the right concrete ColumnMapping + if (AccumuloHiveConstants.ROWID.equals(rawMapping)) { + Assert.assertEquals(HiveAccumuloRowIdColumnMapping.class, mapping.getClass()); + } else { + Assert.assertEquals(HiveAccumuloColumnMapping.class, mapping.getClass()); + } + } + + Assert.assertEquals(0, mapper.getRowIdOffset()); + Assert.assertTrue(mapper.hasRowIdMapping()); + } + + @Test(expected = IllegalArgumentException.class) + public void testMultipleRowIDsFails() throws TooManyAccumuloColumnsException { + new ColumnMapper(AccumuloHiveConstants.ROWID + AccumuloHiveConstants.COMMA + + AccumuloHiveConstants.ROWID, null, Arrays.asList("row", "row2"), + Arrays. asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo)); + } + + @Test + public void testGetMappingFromHiveColumn() throws TooManyAccumuloColumnsException { + List hiveColumns = Arrays.asList("rowid", "col1", "col2", "col3"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + List rawMappings = Arrays.asList(AccumuloHiveConstants.ROWID, "cf:cq", "cf:_", + "cf:qual"); + ColumnMapper mapper = new ColumnMapper( + Joiner.on(AccumuloHiveConstants.COMMA).join(rawMappings), null, hiveColumns, columnTypes); + + for (int i = 0; i < hiveColumns.size(); i++) { + String hiveColumn = hiveColumns.get(i), accumuloMapping = rawMappings.get(i); + ColumnMapping mapping = mapper.getColumnMappingForHiveColumn(hiveColumns, hiveColumn); + + Assert.assertEquals(accumuloMapping, mapping.getMappingSpec()); + } + } + + @Test + public void testGetTypesString() throws TooManyAccumuloColumnsException { + List hiveColumns = Arrays.asList("rowid", "col1", "col2", "col3"); + List rawMappings = Arrays.asList(AccumuloHiveConstants.ROWID, "cf:cq", "cf:_", + "cf:qual"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + ColumnMapper mapper = new ColumnMapper( + Joiner.on(AccumuloHiveConstants.COMMA).join(rawMappings), null, hiveColumns, columnTypes); + + String typeString = mapper.getTypesString(); + String[] types = StringUtils.split(typeString, AccumuloHiveConstants.COLON); + Assert.assertEquals(rawMappings.size(), types.length); + for (String type : types) { + Assert.assertEquals(serdeConstants.STRING_TYPE_NAME, type); + } + } + + @Test + public void testDefaultBinary() throws TooManyAccumuloColumnsException { + List hiveColumns = Arrays.asList("rowid", "col1", "col2", "col3", "col4"); + List rawMappings = Arrays.asList(AccumuloHiveConstants.ROWID, "cf:cq", "cf:_#s", + "cf:qual#s", "cf:qual2"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + ColumnMapper mapper = new ColumnMapper( + Joiner.on(AccumuloHiveConstants.COMMA).join(rawMappings), ColumnEncoding.BINARY.getName(), + hiveColumns, columnTypes); + + List mappings = mapper.getColumnMappings(); + Assert.assertEquals(5, mappings.size()); + + Assert.assertEquals(ColumnEncoding.BINARY, mappings.get(0).getEncoding()); + Assert.assertEquals(columnTypes.get(0).toString(), mappings.get(0).getColumnType()); + + Assert.assertEquals(ColumnEncoding.BINARY, mappings.get(1).getEncoding()); + Assert.assertEquals(columnTypes.get(1).toString(), mappings.get(1).getColumnType()); + + Assert.assertEquals(ColumnEncoding.STRING, mappings.get(2).getEncoding()); + Assert.assertEquals(columnTypes.get(2).toString(), mappings.get(2).getColumnType()); + + Assert.assertEquals(ColumnEncoding.STRING, mappings.get(3).getEncoding()); + Assert.assertEquals(columnTypes.get(3).toString(), mappings.get(3).getColumnType()); + + Assert.assertEquals(ColumnEncoding.BINARY, mappings.get(4).getEncoding()); + Assert.assertEquals(columnTypes.get(4).toString(), mappings.get(4).getColumnType()); + + } + + @Test + public void testMap() throws TooManyAccumuloColumnsException { + List hiveColumns = Arrays.asList("rowid", "col1", "col2", "col3"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo), TypeInfoFactory.getMapTypeInfo( + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo), + TypeInfoFactory.stringTypeInfo); + List rawMappings = Arrays.asList(AccumuloHiveConstants.ROWID, "cf1:*", "cf2:2*", + "cq3:bar\\*"); + ColumnMapper mapper = new ColumnMapper( + Joiner.on(AccumuloHiveConstants.COMMA).join(rawMappings), ColumnEncoding.BINARY.getName(), + hiveColumns, columnTypes); + + List mappings = mapper.getColumnMappings(); + Assert.assertEquals(4, mappings.size()); + + Assert.assertEquals(HiveAccumuloRowIdColumnMapping.class, mappings.get(0).getClass()); + Assert.assertEquals(HiveAccumuloMapColumnMapping.class, mappings.get(1).getClass()); + Assert.assertEquals(HiveAccumuloMapColumnMapping.class, mappings.get(2).getClass()); + Assert.assertEquals(HiveAccumuloColumnMapping.class, mappings.get(3).getClass()); + + HiveAccumuloRowIdColumnMapping row = (HiveAccumuloRowIdColumnMapping) mappings.get(0); + Assert.assertEquals(ColumnEncoding.BINARY, row.getEncoding()); + Assert.assertEquals(hiveColumns.get(0), row.getColumnName()); + Assert.assertEquals(columnTypes.get(0).toString(), row.getColumnType()); + + HiveAccumuloMapColumnMapping map = (HiveAccumuloMapColumnMapping) mappings.get(1); + Assert.assertEquals("cf1", map.getColumnFamily()); + Assert.assertEquals("", map.getColumnQualifierPrefix()); + Assert.assertEquals(ColumnEncoding.BINARY, map.getEncoding()); + Assert.assertEquals(hiveColumns.get(1), map.getColumnName()); + Assert.assertEquals(columnTypes.get(1).toString(), map.getColumnType()); + + map = (HiveAccumuloMapColumnMapping) mappings.get(2); + Assert.assertEquals("cf2", map.getColumnFamily()); + Assert.assertEquals("2", map.getColumnQualifierPrefix()); + Assert.assertEquals(ColumnEncoding.BINARY, map.getEncoding()); + Assert.assertEquals(hiveColumns.get(2), map.getColumnName()); + Assert.assertEquals(columnTypes.get(2).toString(), map.getColumnType()); + + HiveAccumuloColumnMapping column = (HiveAccumuloColumnMapping) mappings.get(3); + Assert.assertEquals("cq3", column.getColumnFamily()); + Assert.assertEquals("bar*", column.getColumnQualifier()); + Assert.assertEquals(ColumnEncoding.BINARY, column.getEncoding()); + Assert.assertEquals(hiveColumns.get(3), column.getColumnName()); + Assert.assertEquals(columnTypes.get(3).toString(), column.getColumnType()); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnMappingFactory.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnMappingFactory.java new file mode 100644 index 0000000..7e7ee4c --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestColumnMappingFactory.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import java.util.Map.Entry; + +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; +import org.junit.Test; + +/** + * + */ +public class TestColumnMappingFactory { + + @Test(expected = NullPointerException.class) + public void testNullArgumentsFailFast() { + ColumnMappingFactory.get(null, null, null, null); + } + + @Test + public void testRowIdCreatesRowIdMapping() { + ColumnMapping mapping = ColumnMappingFactory.get(AccumuloHiveConstants.ROWID, + ColumnEncoding.STRING, "row", TypeInfoFactory.stringTypeInfo); + + Assert.assertEquals(HiveAccumuloRowIdColumnMapping.class, mapping.getClass()); + Assert.assertEquals("row", mapping.getColumnName()); + Assert.assertEquals(TypeInfoFactory.stringTypeInfo.toString(), mapping.getColumnType()); + } + + @Test + public void testColumnMappingCreatesAccumuloColumnMapping() { + ColumnMapping mapping = ColumnMappingFactory.get("cf:cq", ColumnEncoding.STRING, "col", + TypeInfoFactory.stringTypeInfo); + + Assert.assertEquals(HiveAccumuloColumnMapping.class, mapping.getClass()); + Assert.assertEquals("col", mapping.getColumnName()); + Assert.assertEquals(TypeInfoFactory.stringTypeInfo.toString(), mapping.getColumnType()); + } + + @Test(expected = InvalidColumnMappingException.class) + public void testColumnMappingRequiresCfAndCq() { + ColumnMappingFactory.parseMapping("cf"); + } + + @Test + public void testColumnMappingWithMultipleColons() { + // A column qualifier with a colon + String cf = "cf", cq = "cq1:cq2"; + Entry pair = ColumnMappingFactory.parseMapping(cf + ":" + cq); + + Assert.assertEquals(cf, pair.getKey()); + Assert.assertEquals(cq, pair.getValue()); + } + + @Test + public void testEscapedColumnFamily() { + String cf = "c" + '\\' + ":f", cq = "cq1:cq2"; + Entry pair = ColumnMappingFactory.parseMapping(cf + ":" + cq); + + // The getter should remove the escape character for us + Assert.assertEquals("c:f", pair.getKey()); + Assert.assertEquals(cq, pair.getValue()); + } + + @Test + public void testEscapedColumnFamilyAndQualifier() { + String cf = "c" + '\\' + ":f", cq = "cq1\\:cq2"; + Entry pair = ColumnMappingFactory.parseMapping(cf + ":" + cq); + + // The getter should remove the escape character for us + Assert.assertEquals("c:f", pair.getKey()); + Assert.assertEquals("cq1:cq2", pair.getValue()); + } + + @Test + public void testGetMap() { + String mappingStr = "cf:*"; + ColumnMapping mapping = ColumnMappingFactory.get(mappingStr, ColumnEncoding.getDefault(), + "col", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo)); + + Assert.assertEquals(HiveAccumuloMapColumnMapping.class, mapping.getClass()); + HiveAccumuloMapColumnMapping mapMapping = (HiveAccumuloMapColumnMapping) mapping; + + Assert.assertEquals("cf", mapMapping.getColumnFamily()); + Assert.assertEquals("", mapMapping.getColumnQualifierPrefix()); + Assert.assertEquals(ColumnEncoding.getDefault(), mapMapping.getKeyEncoding()); + Assert.assertEquals(ColumnEncoding.getDefault(), mapMapping.getValueEncoding()); + } + + @Test + public void testGetMapWithPrefix() { + String mappingStr = "cf:foo*"; + ColumnMapping mapping = ColumnMappingFactory.get(mappingStr, ColumnEncoding.getDefault(), + "col", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo)); + + Assert.assertEquals(HiveAccumuloMapColumnMapping.class, mapping.getClass()); + HiveAccumuloMapColumnMapping mapMapping = (HiveAccumuloMapColumnMapping) mapping; + + Assert.assertEquals("cf", mapMapping.getColumnFamily()); + Assert.assertEquals("foo", mapMapping.getColumnQualifierPrefix()); + Assert.assertEquals(ColumnEncoding.getDefault(), mapMapping.getKeyEncoding()); + Assert.assertEquals(ColumnEncoding.getDefault(), mapMapping.getValueEncoding()); + } + + @Test + public void testEscapedAsterisk() { + String mappingStr = "cf:\\*"; + ColumnMapping mapping = ColumnMappingFactory.get(mappingStr, ColumnEncoding.getDefault(), + "col", TypeInfoFactory.stringTypeInfo); + + Assert.assertEquals(HiveAccumuloColumnMapping.class, mapping.getClass()); + HiveAccumuloColumnMapping colMapping = (HiveAccumuloColumnMapping) mapping; + + Assert.assertEquals("cf", colMapping.getColumnFamily()); + Assert.assertEquals("*", colMapping.getColumnQualifier()); + Assert.assertEquals(ColumnEncoding.getDefault(), colMapping.getEncoding()); + } + + @Test + public void testPrefixWithEscape() { + String mappingStr = "cf:foo\\*bar*"; + ColumnMapping mapping = ColumnMappingFactory.get(mappingStr, ColumnEncoding.getDefault(), + "col", TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo)); + + Assert.assertEquals(HiveAccumuloMapColumnMapping.class, mapping.getClass()); + HiveAccumuloMapColumnMapping mapMapping = (HiveAccumuloMapColumnMapping) mapping; + + Assert.assertEquals("cf", mapMapping.getColumnFamily()); + Assert.assertEquals("foo*bar", mapMapping.getColumnQualifierPrefix()); + Assert.assertEquals(ColumnEncoding.getDefault(), mapMapping.getKeyEncoding()); + Assert.assertEquals(ColumnEncoding.getDefault(), mapMapping.getValueEncoding()); + } + + @Test + public void testInlineEncodingOverridesDefault() { + String mappingStr = "cf:foo#s"; + ColumnMapping mapping = ColumnMappingFactory.get(mappingStr, ColumnEncoding.BINARY, "col", + TypeInfoFactory.stringTypeInfo); + + Assert.assertEquals(HiveAccumuloColumnMapping.class, mapping.getClass()); + HiveAccumuloColumnMapping colMapping = (HiveAccumuloColumnMapping) mapping; + + Assert.assertEquals("cf", colMapping.getColumnFamily()); + Assert.assertEquals("foo", colMapping.getColumnQualifier()); + Assert.assertEquals(ColumnEncoding.STRING, colMapping.getEncoding()); + } + + @Test + public void testCaseInsensitiveRowId() { + String mappingStr = ":rowid"; + ColumnMapping mapping = ColumnMappingFactory.get(mappingStr, ColumnEncoding.getDefault(), + "col", TypeInfoFactory.stringTypeInfo); + + Assert.assertEquals(HiveAccumuloRowIdColumnMapping.class, mapping.getClass()); + + mappingStr = ":rowid#b"; + mapping = ColumnMappingFactory.get(mappingStr, ColumnEncoding.getDefault(), "col", + TypeInfoFactory.stringTypeInfo); + + Assert.assertEquals(HiveAccumuloRowIdColumnMapping.class, mapping.getClass()); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestHiveAccumuloColumnMapping.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestHiveAccumuloColumnMapping.java new file mode 100644 index 0000000..240560d --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestHiveAccumuloColumnMapping.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; +import org.junit.Test; + +/** + * + */ +public class TestHiveAccumuloColumnMapping { + + @Test + public void testColumnMappingWithMultipleColons() { + // A column qualifier with a colon + String cf = "cf", cq = "cq1:cq2"; + HiveAccumuloColumnMapping mapping = new HiveAccumuloColumnMapping(cf, cq, + ColumnEncoding.STRING, "col", TypeInfoFactory.stringTypeInfo.toString()); + + Assert.assertEquals(cf, mapping.getColumnFamily()); + Assert.assertEquals(cq, mapping.getColumnQualifier()); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestHiveRowIdColumnMapping.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestHiveRowIdColumnMapping.java new file mode 100644 index 0000000..468c59b --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/columns/TestHiveRowIdColumnMapping.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.columns; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Test; + +/** + * + */ +public class TestHiveRowIdColumnMapping { + + @Test(expected = IllegalArgumentException.class) + public void testNonRowIdMappingFails() { + new HiveAccumuloRowIdColumnMapping("foo", ColumnEncoding.STRING, "col", + TypeInfoFactory.stringTypeInfo.toString()); + } + +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableInputFormat.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableInputFormat.java new file mode 100644 index 0000000..e2ad8ef --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableInputFormat.java @@ -0,0 +1,743 @@ +package org.apache.hadoop.hive.accumulo.mr; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; + +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.ZooKeeperInstance; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Range; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.accumulo.core.util.Pair; +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters; +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.accumulo.AccumuloHiveRow; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.accumulo.predicate.AccumuloPredicateHandler; +import org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter; +import org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.Equal; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.LessThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.mockito.Mockito; + +import com.google.common.collect.Sets; + +public class TestHiveAccumuloTableInputFormat { + public static final String USER = "user"; + public static final String PASS = "password"; + public static final String TEST_TABLE = "table1"; + public static final Text COLUMN_FAMILY = new Text("cf"); + + private static final Text NAME = new Text("name"); + private static final Text SID = new Text("sid"); + private static final Text DEGREES = new Text("dgrs"); + private static final Text MILLIS = new Text("mills"); + + private Instance mockInstance; + private Connector con; + private HiveAccumuloTableInputFormat inputformat; + private JobConf conf; + private List columnNames; + private List columnTypes; + + @Rule + public TestName test = new TestName(); + + @Before + public void createMockKeyValues() throws Exception { + // Make a MockInstance here, by setting the instance name to be the same as this mock instance + // we can "trick" the InputFormat into using a MockInstance + mockInstance = new MockInstance(test.getMethodName()); + inputformat = new HiveAccumuloTableInputFormat(); + conf = new JobConf(); + conf.set(AccumuloSerDeParameters.TABLE_NAME, TEST_TABLE); + conf.set(AccumuloSerDeParameters.USE_MOCK_INSTANCE, "true"); + conf.set(AccumuloSerDeParameters.INSTANCE_NAME, test.getMethodName()); + conf.set(AccumuloSerDeParameters.USER_NAME, USER); + conf.set(AccumuloSerDeParameters.USER_PASS, PASS); + conf.set(AccumuloSerDeParameters.ZOOKEEPERS, "localhost:2181"); // not used for mock, but + // required by input format. + + columnNames = Arrays.asList("name", "sid", "dgrs", "mills"); + columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.longTypeInfo); + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:name,cf:sid,cf:dgrs,cf:mills"); + conf.set(serdeConstants.LIST_COLUMNS, "name,sid,dgrs,mills"); + conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,double,bigint"); + + con = mockInstance.getConnector(USER, new PasswordToken(PASS.getBytes())); + con.tableOperations().create(TEST_TABLE); + con.securityOperations().changeUserAuthorizations(USER, new Authorizations("blah")); + BatchWriterConfig writerConf = new BatchWriterConfig(); + BatchWriter writer = con.createBatchWriter(TEST_TABLE, writerConf); + + Mutation m1 = new Mutation(new Text("r1")); + m1.put(COLUMN_FAMILY, NAME, new Value("brian".getBytes())); + m1.put(COLUMN_FAMILY, SID, new Value(parseIntBytes("1"))); + m1.put(COLUMN_FAMILY, DEGREES, new Value(parseDoubleBytes("44.5"))); + m1.put(COLUMN_FAMILY, MILLIS, new Value(parseLongBytes("555"))); + + Mutation m2 = new Mutation(new Text("r2")); + m2.put(COLUMN_FAMILY, NAME, new Value("mark".getBytes())); + m2.put(COLUMN_FAMILY, SID, new Value(parseIntBytes("2"))); + m2.put(COLUMN_FAMILY, DEGREES, new Value(parseDoubleBytes("55.5"))); + m2.put(COLUMN_FAMILY, MILLIS, new Value(parseLongBytes("666"))); + + Mutation m3 = new Mutation(new Text("r3")); + m3.put(COLUMN_FAMILY, NAME, new Value("dennis".getBytes())); + m3.put(COLUMN_FAMILY, SID, new Value(parseIntBytes("3"))); + m3.put(COLUMN_FAMILY, DEGREES, new Value(parseDoubleBytes("65.5"))); + m3.put(COLUMN_FAMILY, MILLIS, new Value(parseLongBytes("777"))); + + writer.addMutation(m1); + writer.addMutation(m2); + writer.addMutation(m3); + + writer.close(); + } + + private byte[] parseIntBytes(String s) throws IOException { + int val = Integer.parseInt(s); + ByteArrayOutputStream baos = new ByteArrayOutputStream(4); + DataOutputStream out = new DataOutputStream(baos); + out.writeInt(val); + out.close(); + return baos.toByteArray(); + } + + private byte[] parseLongBytes(String s) throws IOException { + long val = Long.parseLong(s); + ByteArrayOutputStream baos = new ByteArrayOutputStream(8); + DataOutputStream out = new DataOutputStream(baos); + out.writeLong(val); + out.close(); + return baos.toByteArray(); + } + + private byte[] parseDoubleBytes(String s) throws IOException { + double val = Double.parseDouble(s); + ByteArrayOutputStream baos = new ByteArrayOutputStream(8); + DataOutputStream out = new DataOutputStream(baos); + out.writeDouble(val); + out.close(); + return baos.toByteArray(); + } + + @Test + public void testHiveAccumuloRecord() throws Exception { + FileInputFormat.addInputPath(conf, new Path("unused")); + InputSplit[] splits = inputformat.getSplits(conf, 0); + assertEquals(splits.length, 1); + RecordReader reader = inputformat.getRecordReader(splits[0], conf, null); + Text rowId = new Text("r1"); + AccumuloHiveRow row = new AccumuloHiveRow(); + row.add(COLUMN_FAMILY.toString(), NAME.toString(), "brian".getBytes()); + row.add(COLUMN_FAMILY.toString(), SID.toString(), parseIntBytes("1")); + row.add(COLUMN_FAMILY.toString(), DEGREES.toString(), parseDoubleBytes("44.5")); + row.add(COLUMN_FAMILY.toString(), MILLIS.toString(), parseLongBytes("555")); + assertTrue(reader.next(rowId, row)); + assertEquals(rowId.toString(), row.getRowId()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals("brian".getBytes(), row.getValue(COLUMN_FAMILY, NAME)); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, SID)); + assertArrayEquals(parseIntBytes("1"), row.getValue(COLUMN_FAMILY, SID)); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, DEGREES)); + assertArrayEquals(parseDoubleBytes("44.5"), row.getValue(COLUMN_FAMILY, DEGREES)); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, MILLIS)); + assertArrayEquals(parseLongBytes("555"), row.getValue(COLUMN_FAMILY, MILLIS)); + } + + @Test + public void testGetOnlyName() throws Exception { + FileInputFormat.addInputPath(conf, new Path("unused")); + + InputSplit[] splits = inputformat.getSplits(conf, 0); + assertEquals(splits.length, 1); + RecordReader reader = inputformat.getRecordReader(splits[0], conf, null); + Text rowId = new Text("r1"); + AccumuloHiveRow row = new AccumuloHiveRow(); + assertTrue(reader.next(rowId, row)); + assertEquals(row.getRowId(), rowId.toString()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "brian".getBytes()); + + rowId = new Text("r2"); + assertTrue(reader.next(rowId, row)); + assertEquals(row.getRowId(), rowId.toString()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "mark".getBytes()); + + rowId = new Text("r3"); + assertTrue(reader.next(rowId, row)); + assertEquals(row.getRowId(), rowId.toString()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "dennis".getBytes()); + + assertFalse(reader.next(rowId, row)); + } + + @Test + public void testDegreesAndMillis() throws Exception { + Connector con = mockInstance.getConnector(USER, new PasswordToken(PASS.getBytes())); + Scanner scan = con.createScanner(TEST_TABLE, new Authorizations("blah")); + IteratorSetting is = new IteratorSetting(1, PrimitiveComparisonFilter.FILTER_PREFIX + 1, + PrimitiveComparisonFilter.class); + + is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, DoubleCompare.class.getName()); + is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, GreaterThanOrEqual.class.getName()); + is.addOption(PrimitiveComparisonFilter.CONST_VAL, + new String(Base64.encodeBase64(parseDoubleBytes("55.6")))); + is.addOption(PrimitiveComparisonFilter.COLUMN, "cf:dgrs"); + scan.addScanIterator(is); + + IteratorSetting is2 = new IteratorSetting(2, PrimitiveComparisonFilter.FILTER_PREFIX + 2, + PrimitiveComparisonFilter.class); + + is2.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, LongCompare.class.getName()); + is2.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, LessThan.class.getName()); + is2.addOption(PrimitiveComparisonFilter.CONST_VAL, + new String(Base64.encodeBase64(parseLongBytes("778")))); + is2.addOption(PrimitiveComparisonFilter.COLUMN, "cf:mills"); + + scan.addScanIterator(is2); + + boolean foundDennis = false; + int totalCount = 0; + for (Map.Entry kv : scan) { + boolean foundName = false; + boolean foundSid = false; + boolean foundDegrees = false; + boolean foundMillis = false; + SortedMap items = PrimitiveComparisonFilter.decodeRow(kv.getKey(), kv.getValue()); + for (Map.Entry item : items.entrySet()) { + SortedMap nestedItems = PrimitiveComparisonFilter.decodeRow(item.getKey(), + item.getValue()); + for (Map.Entry nested : nestedItems.entrySet()) { + if (nested.getKey().getRow().toString().equals("r3")) { + foundDennis = true; + } + if (nested.getKey().getColumnQualifier().equals(NAME)) { + foundName = true; + } else if (nested.getKey().getColumnQualifier().equals(SID)) { + foundSid = true; + } else if (nested.getKey().getColumnQualifier().equals(DEGREES)) { + foundDegrees = true; + } else if (nested.getKey().getColumnQualifier().equals(MILLIS)) { + foundMillis = true; + } + } + } + totalCount++; + assertTrue(foundDegrees & foundMillis & foundName & foundSid); + } + assertTrue(foundDennis); + assertEquals(totalCount, 1); + } + + @Test + public void testGreaterThan1Sid() throws Exception { + Connector con = mockInstance.getConnector(USER, new PasswordToken(PASS.getBytes())); + Scanner scan = con.createScanner(TEST_TABLE, new Authorizations("blah")); + IteratorSetting is = new IteratorSetting(1, PrimitiveComparisonFilter.FILTER_PREFIX + 1, + PrimitiveComparisonFilter.class); + + is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName()); + is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, GreaterThan.class.getName()); + is.addOption(PrimitiveComparisonFilter.CONST_VAL, + new String(Base64.encodeBase64(parseIntBytes("1")))); + is.addOption(PrimitiveComparisonFilter.COLUMN, "cf:sid"); + scan.addScanIterator(is); + boolean foundMark = false; + boolean foundDennis = false; + int totalCount = 0; + for (Map.Entry kv : scan) { + boolean foundName = false; + boolean foundSid = false; + boolean foundDegrees = false; + boolean foundMillis = false; + SortedMap items = PrimitiveComparisonFilter.decodeRow(kv.getKey(), kv.getValue()); + for (Map.Entry item : items.entrySet()) { + if (item.getKey().getRow().toString().equals("r2")) { + foundMark = true; + } else if (item.getKey().getRow().toString().equals("r3")) { + foundDennis = true; + } + if (item.getKey().getColumnQualifier().equals(NAME)) { + foundName = true; + } else if (item.getKey().getColumnQualifier().equals(SID)) { + foundSid = true; + } else if (item.getKey().getColumnQualifier().equals(DEGREES)) { + foundDegrees = true; + } else if (item.getKey().getColumnQualifier().equals(MILLIS)) { + foundMillis = true; + } + } + totalCount++; + assertTrue(foundDegrees & foundMillis & foundName & foundSid); + } + assertTrue(foundDennis & foundMark); + assertEquals(totalCount, 2); + } + + @Test + public void testNameEqualBrian() throws Exception { + Connector con = mockInstance.getConnector(USER, new PasswordToken(PASS.getBytes())); + Scanner scan = con.createScanner(TEST_TABLE, new Authorizations("blah")); + IteratorSetting is = new IteratorSetting(1, PrimitiveComparisonFilter.FILTER_PREFIX + 1, + PrimitiveComparisonFilter.class); + + is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName()); + is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); + is.addOption(PrimitiveComparisonFilter.CONST_VAL, + new String(Base64.encodeBase64("brian".getBytes()))); + is.addOption(PrimitiveComparisonFilter.COLUMN, "cf:name"); + scan.addScanIterator(is); + boolean foundName = false; + boolean foundSid = false; + boolean foundDegrees = false; + boolean foundMillis = false; + for (Map.Entry kv : scan) { + SortedMap items = PrimitiveComparisonFilter.decodeRow(kv.getKey(), kv.getValue()); + for (Map.Entry item : items.entrySet()) { + assertEquals(item.getKey().getRow().toString(), "r1"); + if (item.getKey().getColumnQualifier().equals(NAME)) { + foundName = true; + assertArrayEquals(item.getValue().get(), "brian".getBytes()); + } else if (item.getKey().getColumnQualifier().equals(SID)) { + foundSid = true; + assertArrayEquals(item.getValue().get(), parseIntBytes("1")); + } else if (item.getKey().getColumnQualifier().equals(DEGREES)) { + foundDegrees = true; + assertArrayEquals(item.getValue().get(), parseDoubleBytes("44.5")); + } else if (item.getKey().getColumnQualifier().equals(MILLIS)) { + foundMillis = true; + assertArrayEquals(item.getValue().get(), parseLongBytes("555")); + } + } + } + assertTrue(foundDegrees & foundMillis & foundName & foundSid); + } + + @Test + public void testGetNone() throws Exception { + FileInputFormat.addInputPath(conf, new Path("unused")); + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:f1"); + InputSplit[] splits = inputformat.getSplits(conf, 0); + assertEquals(splits.length, 1); + RecordReader reader = inputformat.getRecordReader(splits[0], conf, null); + Text rowId = new Text("r1"); + AccumuloHiveRow row = new AccumuloHiveRow(); + row.setRowId("r1"); + assertFalse(reader.next(rowId, row)); + } + + @Test + public void testIteratorNotInSplitsCompensation() throws Exception { + FileInputFormat.addInputPath(conf, new Path("unused")); + InputSplit[] splits = inputformat.getSplits(conf, 0); + + assertEquals(1, splits.length); + InputSplit split = splits[0]; + + IteratorSetting is = new IteratorSetting(1, PrimitiveComparisonFilter.FILTER_PREFIX + 1, + PrimitiveComparisonFilter.class); + + is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName()); + is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); + is.addOption(PrimitiveComparisonFilter.CONST_VAL, + new String(Base64.encodeBase64(new byte[] {'0'}))); + is.addOption(PrimitiveComparisonFilter.COLUMN, "cf:cq"); + + // Mock out the predicate handler because it's just easier + AccumuloPredicateHandler predicateHandler = Mockito.mock(AccumuloPredicateHandler.class); + Mockito.when( + predicateHandler.getIterators(Mockito.any(JobConf.class), Mockito.any(ColumnMapper.class))) + .thenReturn(Arrays.asList(is)); + + // Set it on our inputformat + inputformat.predicateHandler = predicateHandler; + + inputformat.getRecordReader(split, conf, null); + + // The code should account for the bug and update the iterators on the split + List settingsOnSplit = ((HiveAccumuloSplit) split).getSplit().getIterators(); + assertEquals(1, settingsOnSplit.size()); + assertEquals(is, settingsOnSplit.get(0)); + } + + @Test + public void testColumnMappingsToPairs() { + List mappings = new ArrayList(); + Set> columns = new HashSet>(); + + // Row ID + mappings.add(new HiveAccumuloRowIdColumnMapping(AccumuloHiveConstants.ROWID, + ColumnEncoding.STRING, "row", TypeInfoFactory.stringTypeInfo.toString())); + + // Some cf:cq + mappings.add(new HiveAccumuloColumnMapping("person", "name", ColumnEncoding.STRING, "col1", + TypeInfoFactory.stringTypeInfo.toString())); + mappings.add(new HiveAccumuloColumnMapping("person", "age", ColumnEncoding.STRING, "col2", + TypeInfoFactory.stringTypeInfo.toString())); + mappings.add(new HiveAccumuloColumnMapping("person", "height", ColumnEncoding.STRING, "col3", + TypeInfoFactory.stringTypeInfo.toString())); + + // Bare cf + mappings.add(new HiveAccumuloColumnMapping("city", "name", ColumnEncoding.STRING, "col4", + TypeInfoFactory.stringTypeInfo.toString())); + + columns.add(new Pair(new Text("person"), new Text("name"))); + columns.add(new Pair(new Text("person"), new Text("age"))); + columns.add(new Pair(new Text("person"), new Text("height"))); + // Null qualifier would mean all qualifiers in that family, want an empty qualifier + columns.add(new Pair(new Text("city"), new Text("name"))); + + assertEquals(columns, inputformat.getPairCollection(mappings)); + } + + @Test + public void testConfigureMockAccumuloInputFormat() throws Exception { + AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); + ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), + conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); + Set> cfCqPairs = inputformat + .getPairCollection(columnMapper.getColumnMappings()); + List iterators = Collections.emptyList(); + Set ranges = Collections.singleton(new Range()); + + HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); + + // Call out to the real configure method + Mockito.doCallRealMethod().when(mockInputFormat) + .configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges); + + // Also compute the correct cf:cq pairs so we can assert the right argument was passed + Mockito.doCallRealMethod().when(mockInputFormat) + .getPairCollection(columnMapper.getColumnMappings()); + + mockInputFormat.configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, + ranges); + + // Verify that the correct methods are invoked on AccumuloInputFormat + Mockito.verify(mockInputFormat).setMockInstance(conf, mockInstance.getInstanceName()); + Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); + Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); + Mockito.verify(mockInputFormat).setScanAuthorizations(conf, + con.securityOperations().getUserAuthorizations(USER)); + Mockito.verify(mockInputFormat).addIterators(conf, iterators); + Mockito.verify(mockInputFormat).setRanges(conf, ranges); + Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs); + } + + @Test + public void testConfigureAccumuloInputFormat() throws Exception { + AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); + ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), + conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); + Set> cfCqPairs = inputformat + .getPairCollection(columnMapper.getColumnMappings()); + List iterators = Collections.emptyList(); + Set ranges = Collections.singleton(new Range()); + String instanceName = "realInstance"; + String zookeepers = "host1:2181,host2:2181,host3:2181"; + + ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class); + HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); + + // Stub out the ZKI mock + Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName); + Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers); + + // Call out to the real configure method + Mockito.doCallRealMethod().when(mockInputFormat) + .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); + + // Also compute the correct cf:cq pairs so we can assert the right argument was passed + Mockito.doCallRealMethod().when(mockInputFormat) + .getPairCollection(columnMapper.getColumnMappings()); + + mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, + ranges); + + // Verify that the correct methods are invoked on AccumuloInputFormat + Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers); + Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); + Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); + Mockito.verify(mockInputFormat).setScanAuthorizations(conf, + con.securityOperations().getUserAuthorizations(USER)); + Mockito.verify(mockInputFormat).addIterators(conf, iterators); + Mockito.verify(mockInputFormat).setRanges(conf, ranges); + Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs); + } + + @Test + public void testConfigureAccumuloInputFormatWithAuthorizations() throws Exception { + AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); + conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar"); + ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), + conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); + Set> cfCqPairs = inputformat + .getPairCollection(columnMapper.getColumnMappings()); + List iterators = Collections.emptyList(); + Set ranges = Collections.singleton(new Range()); + String instanceName = "realInstance"; + String zookeepers = "host1:2181,host2:2181,host3:2181"; + + ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class); + HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); + + // Stub out the ZKI mock + Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName); + Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers); + + // Call out to the real configure method + Mockito.doCallRealMethod().when(mockInputFormat) + .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); + + // Also compute the correct cf:cq pairs so we can assert the right argument was passed + Mockito.doCallRealMethod().when(mockInputFormat) + .getPairCollection(columnMapper.getColumnMappings()); + + mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, + ranges); + + // Verify that the correct methods are invoked on AccumuloInputFormat + Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers); + Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); + Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); + Mockito.verify(mockInputFormat).setScanAuthorizations(conf, new Authorizations("foo,bar")); + Mockito.verify(mockInputFormat).addIterators(conf, iterators); + Mockito.verify(mockInputFormat).setRanges(conf, ranges); + Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs); + } + + @Test + public void testConfigureAccumuloInputFormatWithIterators() throws Exception { + AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); + ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), + conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); + Set> cfCqPairs = inputformat + .getPairCollection(columnMapper.getColumnMappings()); + List iterators = new ArrayList(); + Set ranges = Collections.singleton(new Range()); + String instanceName = "realInstance"; + String zookeepers = "host1:2181,host2:2181,host3:2181"; + + IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); + cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave"); + cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name"); + iterators.add(cfg); + + cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); + cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50"); + cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age"); + iterators.add(cfg); + + ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class); + HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); + + // Stub out the ZKI mock + Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName); + Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers); + + // Call out to the real configure method + Mockito.doCallRealMethod().when(mockInputFormat) + .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); + + // Also compute the correct cf:cq pairs so we can assert the right argument was passed + Mockito.doCallRealMethod().when(mockInputFormat) + .getPairCollection(columnMapper.getColumnMappings()); + + mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, + ranges); + + // Verify that the correct methods are invoked on AccumuloInputFormat + Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers); + Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); + Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); + Mockito.verify(mockInputFormat).setScanAuthorizations(conf, + con.securityOperations().getUserAuthorizations(USER)); + Mockito.verify(mockInputFormat).addIterators(conf, iterators); + Mockito.verify(mockInputFormat).setRanges(conf, ranges); + Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs); + } + + @Test + public void testConfigureAccumuloInputFormatWithEmptyColumns() throws Exception { + AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf); + ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), + conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes); + HashSet> cfCqPairs = Sets.newHashSet(); + List iterators = new ArrayList(); + Set ranges = Collections.singleton(new Range()); + String instanceName = "realInstance"; + String zookeepers = "host1:2181,host2:2181,host3:2181"; + + IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); + cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave"); + cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name"); + iterators.add(cfg); + + cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class); + cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName()); + cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50"); + cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age"); + iterators.add(cfg); + + ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class); + HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class); + + // Stub out the ZKI mock + Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName); + Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers); + Mockito.when(mockInputFormat.getPairCollection(columnMapper.getColumnMappings())).thenReturn( + cfCqPairs); + + // Call out to the real configure method + Mockito.doCallRealMethod().when(mockInputFormat) + .configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges); + + // Also compute the correct cf:cq pairs so we can assert the right argument was passed + Mockito.doCallRealMethod().when(mockInputFormat) + .getPairCollection(columnMapper.getColumnMappings()); + + mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, + ranges); + + // Verify that the correct methods are invoked on AccumuloInputFormat + Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers); + Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS)); + Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE); + Mockito.verify(mockInputFormat).setScanAuthorizations(conf, + con.securityOperations().getUserAuthorizations(USER)); + Mockito.verify(mockInputFormat).addIterators(conf, iterators); + Mockito.verify(mockInputFormat).setRanges(conf, ranges); + + // fetchColumns is not called because we had no columns to fetch + } + + @Test + public void testGetProtectedField() throws Exception { + FileInputFormat.addInputPath(conf, new Path("unused")); + + BatchWriterConfig writerConf = new BatchWriterConfig(); + BatchWriter writer = con.createBatchWriter(TEST_TABLE, writerConf); + + Authorizations origAuths = con.securityOperations().getUserAuthorizations(USER); + con.securityOperations().changeUserAuthorizations(USER, + new Authorizations(origAuths.toString() + ",foo")); + + Mutation m = new Mutation("r4"); + m.put(COLUMN_FAMILY, NAME, new ColumnVisibility("foo"), new Value("frank".getBytes())); + m.put(COLUMN_FAMILY, SID, new ColumnVisibility("foo"), new Value(parseIntBytes("4"))); + m.put(COLUMN_FAMILY, DEGREES, new ColumnVisibility("foo"), new Value(parseDoubleBytes("60.6"))); + m.put(COLUMN_FAMILY, MILLIS, new ColumnVisibility("foo"), new Value(parseLongBytes("777"))); + + writer.addMutation(m); + writer.close(); + + conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo"); + + InputSplit[] splits = inputformat.getSplits(conf, 0); + assertEquals(splits.length, 1); + RecordReader reader = inputformat.getRecordReader(splits[0], conf, null); + Text rowId = new Text("r1"); + AccumuloHiveRow row = new AccumuloHiveRow(); + assertTrue(reader.next(rowId, row)); + assertEquals(row.getRowId(), rowId.toString()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "brian".getBytes()); + + rowId = new Text("r2"); + assertTrue(reader.next(rowId, row)); + assertEquals(row.getRowId(), rowId.toString()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "mark".getBytes()); + + rowId = new Text("r3"); + assertTrue(reader.next(rowId, row)); + assertEquals(row.getRowId(), rowId.toString()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "dennis".getBytes()); + + rowId = new Text("r4"); + assertTrue(reader.next(rowId, row)); + assertEquals(row.getRowId(), rowId.toString()); + assertTrue(row.hasFamAndQual(COLUMN_FAMILY, NAME)); + assertArrayEquals(row.getValue(COLUMN_FAMILY, NAME), "frank".getBytes()); + + assertFalse(reader.next(rowId, row)); + } + + @Test + public void testMapColumnPairs() throws TooManyAccumuloColumnsException { + ColumnMapper columnMapper = new ColumnMapper(":rowID,cf:*", + conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), Arrays.asList("row", "col"), + Arrays. asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo( + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo))); + Set> pairs = inputformat.getPairCollection(columnMapper.getColumnMappings()); + + Assert.assertEquals(1, pairs.size()); + + Pair cfCq = pairs.iterator().next(); + Assert.assertEquals("cf", cfCq.getFirst().toString()); + Assert.assertNull(cfCq.getSecond()); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java new file mode 100644 index 0000000..706b26e --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java @@ -0,0 +1,492 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.mr; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.serde.AccumuloRowSerializer; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyStruct; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordWriter; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.mockito.Mockito; + +import com.google.common.base.Joiner; + +/** + * + */ +public class TestHiveAccumuloTableOutputFormat { + + protected JobConf conf; + protected String user = "root"; + protected String password = "password"; + protected String instanceName = "instance"; + protected String zookeepers = "host1:2181,host2:2181,host3:2181"; + protected String outputTable = "output"; + + @Rule + public TestName test = new TestName(); + + @Before + public void setup() throws IOException { + conf = new JobConf(); + + conf.set(AccumuloConnectionParameters.USER_NAME, user); + conf.set(AccumuloConnectionParameters.USER_PASS, password); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, instanceName); + conf.set(AccumuloConnectionParameters.ZOOKEEPERS, zookeepers); + conf.set(AccumuloConnectionParameters.TABLE_NAME, outputTable); + } + + @Test + public void testBasicConfiguration() throws IOException, AccumuloSecurityException { + HiveAccumuloTableOutputFormat outputFormat = Mockito.mock(HiveAccumuloTableOutputFormat.class); + + Mockito.doCallRealMethod().when(outputFormat).configureAccumuloOutputFormat(conf); + + outputFormat.configureAccumuloOutputFormat(conf); + + Mockito.verify(outputFormat).setAccumuloConnectorInfo(conf, user, new PasswordToken(password)); + Mockito.verify(outputFormat).setAccumuloZooKeeperInstance(conf, instanceName, zookeepers); + Mockito.verify(outputFormat).setDefaultAccumuloTableName(conf, outputTable); + } + + @Test + public void testMockInstance() throws IOException, AccumuloSecurityException { + HiveAccumuloTableOutputFormat outputFormat = Mockito.mock(HiveAccumuloTableOutputFormat.class); + conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true); + conf.unset(AccumuloConnectionParameters.ZOOKEEPERS); + + Mockito.doCallRealMethod().when(outputFormat).configureAccumuloOutputFormat(conf); + + outputFormat.configureAccumuloOutputFormat(conf); + + Mockito.verify(outputFormat).setAccumuloConnectorInfo(conf, user, new PasswordToken(password)); + Mockito.verify(outputFormat).setAccumuloMockInstance(conf, instanceName); + Mockito.verify(outputFormat).setDefaultAccumuloTableName(conf, outputTable); + } + + @Test + public void testWriteToMockInstance() throws Exception { + Instance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + + HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat(); + String table = test.getMethodName(); + conn.tableOperations().create(table); + + JobConf conf = new JobConf(); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName()); + conf.set(AccumuloConnectionParameters.USER_NAME, "root"); + conf.set(AccumuloConnectionParameters.USER_PASS, ""); + conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true); + conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName()); + + FileSystem local = FileSystem.getLocal(conf); + outputFormat.checkOutputSpecs(local, conf); + + RecordWriter recordWriter = outputFormat + .getRecordWriter(local, conf, null, null); + + List names = Arrays.asList("row", "col1", "col2"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2"); + tableProperties.setProperty(serdeConstants.FIELD_DELIM, " "); + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, + accumuloSerDeParams.getRowIdFactory()); + + TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + + LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), + Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), + serDeParams.getSeparators(), serDeParams.getNullSequence(), + serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), + serDeParams.getEscapeChar()); + + LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI); + + ByteArrayRef bytes = new ByteArrayRef(); + bytes.setData("row value1 value2".getBytes()); + struct.init(bytes, 0, bytes.getData().length); + + // Serialize the struct into a mutation + Mutation m = serializer.serialize(struct, structOI); + + // Write the mutation + recordWriter.write(new Text(table), m); + + // Close the writer + recordWriter.close(null); + + Iterator> iter = conn.createScanner(table, new Authorizations()).iterator(); + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + Entry entry = iter.next(); + Key k = entry.getKey(); + Value v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq1", k.getColumnQualifier().toString()); + Assert.assertEquals("", k.getColumnVisibility().toString()); + Assert.assertEquals("value1", new String(v.get())); + + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + entry = iter.next(); + k = entry.getKey(); + v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq2", k.getColumnQualifier().toString()); + Assert.assertEquals("", k.getColumnVisibility().toString()); + Assert.assertEquals("value2", new String(v.get())); + + Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext()); + } + + @Test + public void testWriteToMockInstanceWithVisibility() throws Exception { + Instance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + Authorizations auths = new Authorizations("foo"); + conn.securityOperations().changeUserAuthorizations("root", auths); + + HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat(); + String table = test.getMethodName(); + conn.tableOperations().create(table); + + JobConf conf = new JobConf(); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName()); + conf.set(AccumuloConnectionParameters.USER_NAME, "root"); + conf.set(AccumuloConnectionParameters.USER_PASS, ""); + conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true); + conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName()); + + FileSystem local = FileSystem.getLocal(conf); + outputFormat.checkOutputSpecs(local, conf); + + RecordWriter recordWriter = outputFormat + .getRecordWriter(local, conf, null, null); + + List names = Arrays.asList("row", "col1", "col2"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2"); + tableProperties.setProperty(serdeConstants.FIELD_DELIM, " "); + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), + accumuloSerDeParams.getRowIdFactory()); + + LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), Arrays. asList( + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo), serDeParams.getSeparators(), serDeParams + .getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), + serDeParams.getEscapeChar()); + + LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI); + + ByteArrayRef bytes = new ByteArrayRef(); + bytes.setData("row value1 value2".getBytes()); + struct.init(bytes, 0, bytes.getData().length); + + // Serialize the struct into a mutation + Mutation m = serializer.serialize(struct, structOI); + + // Write the mutation + recordWriter.write(new Text(table), m); + + // Close the writer + recordWriter.close(null); + + Iterator> iter = conn.createScanner(table, auths).iterator(); + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + Entry entry = iter.next(); + Key k = entry.getKey(); + Value v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq1", k.getColumnQualifier().toString()); + Assert.assertEquals("foo", k.getColumnVisibility().toString()); + Assert.assertEquals("value1", new String(v.get())); + + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + entry = iter.next(); + k = entry.getKey(); + v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq2", k.getColumnQualifier().toString()); + Assert.assertEquals("foo", k.getColumnVisibility().toString()); + Assert.assertEquals("value2", new String(v.get())); + + Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext()); + } + + @Test + public void testWriteMap() throws Exception { + Instance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + + HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat(); + String table = test.getMethodName(); + conn.tableOperations().create(table); + + JobConf conf = new JobConf(); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName()); + conf.set(AccumuloConnectionParameters.USER_NAME, "root"); + conf.set(AccumuloConnectionParameters.USER_PASS, ""); + conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true); + conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName()); + + FileSystem local = FileSystem.getLocal(conf); + outputFormat.checkOutputSpecs(local, conf); + + RecordWriter recordWriter = outputFormat + .getRecordWriter(local, conf, null, null); + + List names = Arrays.asList("row", "col1"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:*"); + tableProperties.setProperty(serdeConstants.FIELD_DELIM, " "); + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, + accumuloSerDeParams.getRowIdFactory()); + + TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory + .createLazyObjectInspector(stringTypeInfo, new byte[] {0}, 0, + serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( + stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory + .getLazySimpleStructObjectInspector(Arrays.asList("row", "data"), + Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), + serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), + serDeParams.getEscapeChar()); + + LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI); + + ByteArrayRef bytes = new ByteArrayRef(); + bytes.setData("row cq1:value1,cq2:value2".getBytes()); + struct.init(bytes, 0, bytes.getData().length); + + // Serialize the struct into a mutation + Mutation m = serializer.serialize(struct, structOI); + + // Write the mutation + recordWriter.write(new Text(table), m); + + // Close the writer + recordWriter.close(null); + + Iterator> iter = conn.createScanner(table, new Authorizations()).iterator(); + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + Entry entry = iter.next(); + Key k = entry.getKey(); + Value v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq1", k.getColumnQualifier().toString()); + Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, + k.getColumnVisibilityParsed()); + Assert.assertEquals("value1", new String(v.get())); + + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + entry = iter.next(); + k = entry.getKey(); + v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq2", k.getColumnQualifier().toString()); + Assert.assertEquals(AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, + k.getColumnVisibilityParsed()); + Assert.assertEquals("value2", new String(v.get())); + + Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext()); + } + + @Test + public void testBinarySerializationOnStringFallsBackToUtf8() throws Exception { + Instance inst = new MockInstance(test.getMethodName()); + Connector conn = inst.getConnector("root", new PasswordToken("")); + + HiveAccumuloTableOutputFormat outputFormat = new HiveAccumuloTableOutputFormat(); + String table = test.getMethodName(); + conn.tableOperations().create(table); + + JobConf conf = new JobConf(); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, inst.getInstanceName()); + conf.set(AccumuloConnectionParameters.USER_NAME, "root"); + conf.set(AccumuloConnectionParameters.USER_PASS, ""); + conf.setBoolean(AccumuloConnectionParameters.USE_MOCK_INSTANCE, true); + conf.set(AccumuloConnectionParameters.TABLE_NAME, test.getMethodName()); + + FileSystem local = FileSystem.getLocal(conf); + outputFormat.checkOutputSpecs(local, conf); + + RecordWriter recordWriter = outputFormat + .getRecordWriter(local, conf, null, null); + + List names = Arrays.asList("row", "col1", "col2"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq1,cf:cq2"); + tableProperties.setProperty(serdeConstants.FIELD_DELIM, " "); + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(names)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + tableProperties.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName()); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), AccumuloSerDeParameters.DEFAULT_VISIBILITY_LABEL, + accumuloSerDeParams.getRowIdFactory()); + + TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + + LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(Arrays.asList("row", "cq1", "cq2"), + Arrays.asList(stringTypeInfo, stringTypeInfo, stringTypeInfo), + serDeParams.getSeparators(), serDeParams.getNullSequence(), + serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), + serDeParams.getEscapeChar()); + + LazyStruct struct = (LazyStruct) LazyFactory.createLazyObject(structOI); + + ByteArrayRef bytes = new ByteArrayRef(); + bytes.setData("row value1 value2".getBytes()); + struct.init(bytes, 0, bytes.getData().length); + + // Serialize the struct into a mutation + Mutation m = serializer.serialize(struct, structOI); + + // Write the mutation + recordWriter.write(new Text(table), m); + + // Close the writer + recordWriter.close(null); + + Iterator> iter = conn.createScanner(table, new Authorizations()).iterator(); + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + Entry entry = iter.next(); + Key k = entry.getKey(); + Value v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq1", k.getColumnQualifier().toString()); + Assert.assertEquals("", k.getColumnVisibility().toString()); + Assert.assertEquals("value1", new String(v.get())); + + Assert.assertTrue("Iterator did not have an element as expected", iter.hasNext()); + + entry = iter.next(); + k = entry.getKey(); + v = entry.getValue(); + + Assert.assertEquals("row", k.getRow().toString()); + Assert.assertEquals("cf", k.getColumnFamily().toString()); + Assert.assertEquals("cq2", k.getColumnQualifier().toString()); + Assert.assertEquals("", k.getColumnVisibility().toString()); + Assert.assertEquals("value2", new String(v.get())); + + Assert.assertFalse("Iterator unexpectedly had more data", iter.hasNext()); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTypes.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTypes.java new file mode 100644 index 0000000..a378535 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTypes.java @@ -0,0 +1,826 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.mr; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Map.Entry; + +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.mock.MockInstance; +import org.apache.accumulo.core.client.security.tokens.PasswordToken; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.accumulo.AccumuloHiveRow; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyBoolean; +import org.apache.hadoop.hive.serde2.lazy.LazyByte; +import org.apache.hadoop.hive.serde2.lazy.LazyDate; +import org.apache.hadoop.hive.serde2.lazy.LazyDouble; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyFloat; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveChar; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveDecimal; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar; +import org.apache.hadoop.hive.serde2.lazy.LazyInteger; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.lazy.LazyShort; +import org.apache.hadoop.hive.serde2.lazy.LazyString; +import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDateObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +/** + * + */ +public class TestHiveAccumuloTypes { + + @Rule + public TestName test = new TestName(); + + @Test + public void testBinaryTypes() throws Exception { + final String tableName = test.getMethodName(), user = "root", pass = ""; + + MockInstance mockInstance = new MockInstance(test.getMethodName()); + Connector conn = mockInstance.getConnector(user, new PasswordToken(pass)); + HiveAccumuloTableInputFormat inputformat = new HiveAccumuloTableInputFormat(); + JobConf conf = new JobConf(); + + conf.set(AccumuloSerDeParameters.TABLE_NAME, tableName); + conf.set(AccumuloSerDeParameters.USE_MOCK_INSTANCE, "true"); + conf.set(AccumuloSerDeParameters.INSTANCE_NAME, test.getMethodName()); + conf.set(AccumuloSerDeParameters.USER_NAME, user); + conf.set(AccumuloSerDeParameters.USER_PASS, pass); + conf.set(AccumuloSerDeParameters.ZOOKEEPERS, "localhost:2181"); // not used for mock, but + // required by input format. + + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, AccumuloHiveConstants.ROWID + + ",cf:string,cf:boolean,cf:tinyint,cf:smallint,cf:int,cf:bigint" + + ",cf:float,cf:double,cf:decimal,cf:date,cf:timestamp,cf:char,cf:varchar"); + conf.set( + serdeConstants.LIST_COLUMNS, + "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)"); + conf.set( + serdeConstants.LIST_COLUMN_TYPES, + "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)"); + conf.set(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, "binary"); + + conn.tableOperations().create(tableName); + BatchWriterConfig writerConf = new BatchWriterConfig(); + BatchWriter writer = conn.createBatchWriter(tableName, writerConf); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + + String cf = "cf"; + byte[] cfBytes = cf.getBytes(); + + Mutation m = new Mutation("row1"); + + // string + String stringValue = "string"; + JavaStringObjectInspector stringOI = (JavaStringObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, stringOI.create(stringValue), stringOI, false, (byte) 0, + null); + m.put(cfBytes, "string".getBytes(), baos.toByteArray()); + + // boolean + boolean booleanValue = true; + baos.reset(); + JavaBooleanObjectInspector booleanOI = (JavaBooleanObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME)); + LazyUtils.writePrimitive(baos, booleanOI.create(booleanValue), booleanOI); + m.put(cfBytes, "boolean".getBytes(), baos.toByteArray()); + + // tinyint + byte tinyintValue = -127; + baos.reset(); + JavaByteObjectInspector byteOI = (JavaByteObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME)); + LazyUtils.writePrimitive(baos, tinyintValue, byteOI); + m.put(cfBytes, "tinyint".getBytes(), baos.toByteArray()); + + // smallint + short smallintValue = Short.MAX_VALUE; + baos.reset(); + JavaShortObjectInspector shortOI = (JavaShortObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME)); + LazyUtils.writePrimitive(baos, smallintValue, shortOI); + m.put(cfBytes, "smallint".getBytes(), baos.toByteArray()); + + // int + int intValue = Integer.MAX_VALUE; + baos.reset(); + JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME)); + LazyUtils.writePrimitive(baos, intValue, intOI); + m.put(cfBytes, "int".getBytes(), baos.toByteArray()); + + // bigint + long bigintValue = Long.MAX_VALUE; + baos.reset(); + JavaLongObjectInspector longOI = (JavaLongObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME)); + LazyUtils.writePrimitive(baos, bigintValue, longOI); + m.put(cfBytes, "bigint".getBytes(), baos.toByteArray()); + + // float + float floatValue = Float.MAX_VALUE; + baos.reset(); + JavaFloatObjectInspector floatOI = (JavaFloatObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME)); + LazyUtils.writePrimitive(baos, floatValue, floatOI); + m.put(cfBytes, "float".getBytes(), baos.toByteArray()); + + // double + double doubleValue = Double.MAX_VALUE; + baos.reset(); + JavaDoubleObjectInspector doubleOI = (JavaDoubleObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME)); + LazyUtils.writePrimitive(baos, doubleValue, doubleOI); + m.put(cfBytes, "double".getBytes(), baos.toByteArray()); + + // decimal + baos.reset(); + HiveDecimal decimalValue = HiveDecimal.create(65536l); + HiveDecimalWritable decimalWritable = new HiveDecimalWritable(decimalValue); + decimalWritable.write(out); + m.put(cfBytes, "decimal".getBytes(), baos.toByteArray()); + + // date + baos.reset(); + Date now = new Date(System.currentTimeMillis()); + DateWritable dateWritable = new DateWritable(now); + Date dateValue = dateWritable.get(); + dateWritable.write(out); + m.put(cfBytes, "date".getBytes(), baos.toByteArray()); + + // tiemestamp + baos.reset(); + Timestamp timestampValue = new Timestamp(now.getTime()); + ByteStream.Output output = new ByteStream.Output(); + TimestampWritable timestampWritable = new TimestampWritable(new Timestamp(now.getTime())); + timestampWritable.write(output); + output.close(); + m.put(cfBytes, "timestamp".getBytes(), output.toByteArray()); + + // char + baos.reset(); + HiveChar charValue = new HiveChar("char", 4); + JavaHiveCharObjectInspector charOI = (JavaHiveCharObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(new CharTypeInfo(4)); + LazyUtils.writePrimitiveUTF8(baos, charOI.create(charValue), charOI, false, (byte) 0, null); + m.put(cfBytes, "char".getBytes(), baos.toByteArray()); + + baos.reset(); + HiveVarchar varcharValue = new HiveVarchar("varchar", 7); + JavaHiveVarcharObjectInspector varcharOI = (JavaHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(new VarcharTypeInfo(7)); + LazyUtils.writePrimitiveUTF8(baos, varcharOI.create(varcharValue), varcharOI, false, (byte) 0, + null); + m.put(cfBytes, "varchar".getBytes(), baos.toByteArray()); + + writer.addMutation(m); + + writer.close(); + + for (Entry e : conn.createScanner(tableName, new Authorizations())) { + System.out.println(e); + } + + // Create the RecordReader + FileInputFormat.addInputPath(conf, new Path("unused")); + InputSplit[] splits = inputformat.getSplits(conf, 0); + assertEquals(splits.length, 1); + RecordReader reader = inputformat.getRecordReader(splits[0], conf, null); + + Text key = reader.createKey(); + AccumuloHiveRow value = reader.createValue(); + + reader.next(key, value); + + Assert.assertEquals(13, value.getTuples().size()); + + ByteArrayRef byteRef = new ByteArrayRef(); + + // string + Text cfText = new Text(cf), cqHolder = new Text(); + cqHolder.set("string"); + byte[] valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyStringObjectInspector lazyStringOI = LazyPrimitiveObjectInspectorFactory + .getLazyStringObjectInspector(false, (byte) 0); + LazyString lazyString = (LazyString) LazyFactory.createLazyObject(lazyStringOI); + lazyString.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(stringValue, lazyString.getWritableObject().toString()); + + // boolean + cqHolder.set("boolean"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyBooleanObjectInspector lazyBooleanOI = (LazyBooleanObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME)); + LazyBoolean lazyBoolean = (LazyBoolean) LazyFactory + .createLazyPrimitiveBinaryClass(lazyBooleanOI); + lazyBoolean.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(booleanValue, lazyBoolean.getWritableObject().get()); + + // tinyint + cqHolder.set("tinyint"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyByteObjectInspector lazyByteOI = (LazyByteObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME)); + LazyByte lazyByte = (LazyByte) LazyFactory.createLazyPrimitiveBinaryClass(lazyByteOI); + lazyByte.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(tinyintValue, lazyByte.getWritableObject().get()); + + // smallint + cqHolder.set("smallint"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyShortObjectInspector lazyShortOI = (LazyShortObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME)); + LazyShort lazyShort = (LazyShort) LazyFactory.createLazyPrimitiveBinaryClass(lazyShortOI); + lazyShort.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(smallintValue, lazyShort.getWritableObject().get()); + + // int + cqHolder.set("int"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyIntObjectInspector lazyIntOI = (LazyIntObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME)); + LazyInteger lazyInt = (LazyInteger) LazyFactory.createLazyPrimitiveBinaryClass(lazyIntOI); + lazyInt.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(intValue, lazyInt.getWritableObject().get()); + + // bigint + cqHolder.set("bigint"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyLongObjectInspector lazyLongOI = (LazyLongObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME)); + LazyLong lazyLong = (LazyLong) LazyFactory.createLazyPrimitiveBinaryClass(lazyLongOI); + lazyLong.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(bigintValue, lazyLong.getWritableObject().get()); + + // float + cqHolder.set("float"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyFloatObjectInspector lazyFloatOI = (LazyFloatObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME)); + LazyFloat lazyFloat = (LazyFloat) LazyFactory.createLazyPrimitiveBinaryClass(lazyFloatOI); + lazyFloat.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(floatValue, lazyFloat.getWritableObject().get(), 0); + + // double + cqHolder.set("double"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyDoubleObjectInspector lazyDoubleOI = (LazyDoubleObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME)); + LazyDouble lazyDouble = (LazyDouble) LazyFactory.createLazyPrimitiveBinaryClass(lazyDoubleOI); + lazyDouble.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(doubleValue, lazyDouble.getWritableObject().get(), 0); + + // decimal + cqHolder.set("decimal"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + ByteArrayInputStream bais = new ByteArrayInputStream(valueBytes); + DataInputStream in = new DataInputStream(bais); + decimalWritable.readFields(in); + + Assert.assertEquals(decimalValue, decimalWritable.getHiveDecimal()); + + // date + cqHolder.set("date"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + bais = new ByteArrayInputStream(valueBytes); + in = new DataInputStream(bais); + dateWritable.readFields(in); + + Assert.assertEquals(dateValue, dateWritable.get()); + + // timestamp + cqHolder.set("timestamp"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + bais = new ByteArrayInputStream(valueBytes); + in = new DataInputStream(bais); + timestampWritable.readFields(in); + + Assert.assertEquals(timestampValue, timestampWritable.getTimestamp()); + + // char + cqHolder.set("char"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyHiveCharObjectInspector lazyCharOI = (LazyHiveCharObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(new CharTypeInfo(4)); + LazyHiveChar lazyChar = (LazyHiveChar) LazyFactory.createLazyObject(lazyCharOI); + lazyChar.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(charValue, lazyChar.getWritableObject().getHiveChar()); + + // varchar + cqHolder.set("varchar"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyHiveVarcharObjectInspector lazyVarcharOI = (LazyHiveVarcharObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(new VarcharTypeInfo(7)); + LazyHiveVarchar lazyVarchar = (LazyHiveVarchar) LazyFactory.createLazyObject(lazyVarcharOI); + lazyVarchar.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(varcharValue.toString(), lazyVarchar.getWritableObject().getHiveVarchar() + .toString()); + } + + @Test + public void testUtf8Types() throws Exception { + final String tableName = test.getMethodName(), user = "root", pass = ""; + + MockInstance mockInstance = new MockInstance(test.getMethodName()); + Connector conn = mockInstance.getConnector(user, new PasswordToken(pass)); + HiveAccumuloTableInputFormat inputformat = new HiveAccumuloTableInputFormat(); + JobConf conf = new JobConf(); + + conf.set(AccumuloSerDeParameters.TABLE_NAME, tableName); + conf.set(AccumuloSerDeParameters.USE_MOCK_INSTANCE, "true"); + conf.set(AccumuloSerDeParameters.INSTANCE_NAME, test.getMethodName()); + conf.set(AccumuloSerDeParameters.USER_NAME, user); + conf.set(AccumuloSerDeParameters.USER_PASS, pass); + conf.set(AccumuloSerDeParameters.ZOOKEEPERS, "localhost:2181"); // not used for mock, but + // required by input format. + + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, AccumuloHiveConstants.ROWID + + ",cf:string,cf:boolean,cf:tinyint,cf:smallint,cf:int,cf:bigint" + + ",cf:float,cf:double,cf:decimal,cf:date,cf:timestamp,cf:char,cf:varchar"); + conf.set( + serdeConstants.LIST_COLUMNS, + "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)"); + conf.set( + serdeConstants.LIST_COLUMN_TYPES, + "string,string,boolean,tinyint,smallint,int,bigint,float,double,decimal,date,timestamp,char(4),varchar(7)"); + + conn.tableOperations().create(tableName); + BatchWriterConfig writerConf = new BatchWriterConfig(); + BatchWriter writer = conn.createBatchWriter(tableName, writerConf); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + String cf = "cf"; + byte[] cfBytes = cf.getBytes(); + ByteArrayRef byteRef = new ByteArrayRef(); + + Mutation m = new Mutation("row1"); + + // string + String stringValue = "string"; + baos.reset(); + JavaStringObjectInspector stringOI = (JavaStringObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, stringOI.create(stringValue), stringOI, false, (byte) 0, + null); + m.put(cfBytes, "string".getBytes(), baos.toByteArray()); + + // boolean + boolean booleanValue = true; + baos.reset(); + JavaBooleanObjectInspector booleanOI = (JavaBooleanObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, booleanOI.create(booleanValue), booleanOI, false, (byte) 0, + null); + m.put(cfBytes, "boolean".getBytes(), baos.toByteArray()); + + // tinyint + byte tinyintValue = -127; + baos.reset(); + JavaByteObjectInspector byteOI = (JavaByteObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, tinyintValue, byteOI, false, (byte) 0, null); + m.put(cfBytes, "tinyint".getBytes(), baos.toByteArray()); + + // smallint + short smallintValue = Short.MAX_VALUE; + baos.reset(); + JavaShortObjectInspector shortOI = (JavaShortObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, smallintValue, shortOI, false, (byte) 0, null); + m.put(cfBytes, "smallint".getBytes(), baos.toByteArray()); + + // int + int intValue = Integer.MAX_VALUE; + baos.reset(); + JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, intValue, intOI, false, (byte) 0, null); + m.put(cfBytes, "int".getBytes(), baos.toByteArray()); + + // bigint + long bigintValue = Long.MAX_VALUE; + baos.reset(); + JavaLongObjectInspector longOI = (JavaLongObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, bigintValue, longOI, false, (byte) 0, null); + m.put(cfBytes, "bigint".getBytes(), baos.toByteArray()); + + // float + float floatValue = Float.MAX_VALUE; + baos.reset(); + JavaFloatObjectInspector floatOI = (JavaFloatObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, floatValue, floatOI, false, (byte) 0, null); + m.put(cfBytes, "float".getBytes(), baos.toByteArray()); + + // double + double doubleValue = Double.MAX_VALUE; + baos.reset(); + JavaDoubleObjectInspector doubleOI = (JavaDoubleObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, doubleValue, doubleOI, false, (byte) 0, null); + m.put(cfBytes, "double".getBytes(), baos.toByteArray()); + + // decimal + HiveDecimal decimalValue = HiveDecimal.create("1.23"); + baos.reset(); + JavaHiveDecimalObjectInspector decimalOI = (JavaHiveDecimalObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(new DecimalTypeInfo(5, 2)); + LazyUtils.writePrimitiveUTF8(baos, decimalOI.create(decimalValue), decimalOI, false, (byte) 0, + null); + m.put(cfBytes, "decimal".getBytes(), baos.toByteArray()); + + // date + Date now = new Date(System.currentTimeMillis()); + DateWritable dateWritable = new DateWritable(now); + Date dateValue = dateWritable.get(); + baos.reset(); + JavaDateObjectInspector dateOI = (JavaDateObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, dateOI.create(dateValue), dateOI, false, (byte) 0, null); + m.put(cfBytes, "date".getBytes(), baos.toByteArray()); + + // timestamp + Timestamp timestampValue = new Timestamp(now.getTime()); + baos.reset(); + JavaTimestampObjectInspector timestampOI = (JavaTimestampObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME)); + LazyUtils.writePrimitiveUTF8(baos, timestampOI.create(timestampValue), timestampOI, false, + (byte) 0, null); + m.put(cfBytes, "timestamp".getBytes(), baos.toByteArray()); + + // char + baos.reset(); + HiveChar charValue = new HiveChar("char", 4); + JavaHiveCharObjectInspector charOI = (JavaHiveCharObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(new CharTypeInfo(4)); + LazyUtils.writePrimitiveUTF8(baos, charOI.create(charValue), charOI, false, (byte) 0, null); + m.put(cfBytes, "char".getBytes(), baos.toByteArray()); + + // varchar + baos.reset(); + HiveVarchar varcharValue = new HiveVarchar("varchar", 7); + JavaHiveVarcharObjectInspector varcharOI = (JavaHiveVarcharObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(new VarcharTypeInfo(7)); + LazyUtils.writePrimitiveUTF8(baos, varcharOI.create(varcharValue), varcharOI, false, (byte) 0, + null); + m.put(cfBytes, "varchar".getBytes(), baos.toByteArray()); + + writer.addMutation(m); + + writer.close(); + + for (Entry e : conn.createScanner(tableName, new Authorizations())) { + System.out.println(e); + } + + // Create the RecordReader + FileInputFormat.addInputPath(conf, new Path("unused")); + InputSplit[] splits = inputformat.getSplits(conf, 0); + assertEquals(splits.length, 1); + RecordReader reader = inputformat.getRecordReader(splits[0], conf, null); + + Text key = reader.createKey(); + AccumuloHiveRow value = reader.createValue(); + + reader.next(key, value); + + Assert.assertEquals(13, value.getTuples().size()); + + // string + Text cfText = new Text(cf), cqHolder = new Text(); + cqHolder.set("string"); + byte[] valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyStringObjectInspector lazyStringOI = LazyPrimitiveObjectInspectorFactory + .getLazyStringObjectInspector(false, (byte) 0); + LazyString lazyString = (LazyString) LazyFactory.createLazyObject(lazyStringOI); + lazyString.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(new Text(stringValue), lazyString.getWritableObject()); + + // boolean + cqHolder.set("boolean"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyBooleanObjectInspector lazyBooleanOI = (LazyBooleanObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME)); + LazyBoolean lazyBoolean = (LazyBoolean) LazyFactory.createLazyObject(lazyBooleanOI); + lazyBoolean.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(booleanValue, lazyBoolean.getWritableObject().get()); + + // tinyint + cqHolder.set("tinyint"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyByteObjectInspector lazyByteOI = (LazyByteObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME)); + LazyByte lazyByte = (LazyByte) LazyFactory.createLazyObject(lazyByteOI); + lazyByte.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(tinyintValue, lazyByte.getWritableObject().get()); + + // smallint + cqHolder.set("smallint"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyShortObjectInspector lazyShortOI = (LazyShortObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME)); + LazyShort lazyShort = (LazyShort) LazyFactory.createLazyObject(lazyShortOI); + lazyShort.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(smallintValue, lazyShort.getWritableObject().get()); + + // int + cqHolder.set("int"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyIntObjectInspector lazyIntOI = (LazyIntObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME)); + LazyInteger lazyInt = (LazyInteger) LazyFactory.createLazyObject(lazyIntOI); + lazyInt.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(intValue, lazyInt.getWritableObject().get()); + + // bigint + cqHolder.set("bigint"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyLongObjectInspector lazyLongOI = (LazyLongObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME)); + LazyLong lazyLong = (LazyLong) LazyFactory.createLazyObject(lazyLongOI); + lazyLong.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(bigintValue, lazyLong.getWritableObject().get()); + + // float + cqHolder.set("float"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyFloatObjectInspector lazyFloatOI = (LazyFloatObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME)); + LazyFloat lazyFloat = (LazyFloat) LazyFactory.createLazyObject(lazyFloatOI); + lazyFloat.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(floatValue, lazyFloat.getWritableObject().get(), 0); + + // double + cqHolder.set("double"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyDoubleObjectInspector lazyDoubleOI = (LazyDoubleObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME)); + LazyDouble lazyDouble = (LazyDouble) LazyFactory.createLazyObject(lazyDoubleOI); + lazyDouble.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(doubleValue, lazyDouble.getWritableObject().get(), 0); + + // decimal + cqHolder.set("decimal"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyHiveDecimalObjectInspector lazyDecimalOI = (LazyHiveDecimalObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(new DecimalTypeInfo(5, 2)); + LazyHiveDecimal lazyDecimal = (LazyHiveDecimal) LazyFactory.createLazyObject(lazyDecimalOI); + lazyDecimal.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(decimalValue, lazyDecimal.getWritableObject().getHiveDecimal()); + + // date + cqHolder.set("date"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyDateObjectInspector lazyDateOI = (LazyDateObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME)); + LazyDate lazyDate = (LazyDate) LazyFactory.createLazyObject(lazyDateOI); + lazyDate.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(dateValue, lazyDate.getWritableObject().get()); + + // timestamp + cqHolder.set("timestamp"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyTimestampObjectInspector lazyTimestampOI = (LazyTimestampObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME)); + LazyTimestamp lazyTimestamp = (LazyTimestamp) LazyFactory.createLazyObject(lazyTimestampOI); + lazyTimestamp.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(timestampValue, lazyTimestamp.getWritableObject().getTimestamp()); + + // char + cqHolder.set("char"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyHiveCharObjectInspector lazyCharOI = (LazyHiveCharObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(new CharTypeInfo(4)); + LazyHiveChar lazyChar = (LazyHiveChar) LazyFactory.createLazyObject(lazyCharOI); + lazyChar.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(charValue, lazyChar.getWritableObject().getHiveChar()); + + // varchar + cqHolder.set("varchar"); + valueBytes = value.getValue(cfText, cqHolder); + Assert.assertNotNull(valueBytes); + + byteRef.setData(valueBytes); + LazyHiveVarcharObjectInspector lazyVarcharOI = (LazyHiveVarcharObjectInspector) LazyPrimitiveObjectInspectorFactory + .getLazyObjectInspector(new VarcharTypeInfo(7)); + LazyHiveVarchar lazyVarchar = (LazyHiveVarchar) LazyFactory.createLazyObject(lazyVarcharOI); + lazyVarchar.init(byteRef, 0, valueBytes.length); + + Assert.assertEquals(varcharValue.toString(), lazyVarchar.getWritableObject().getHiveVarchar() + .toString()); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java new file mode 100644 index 0000000..c0b14e1 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloPredicateHandler.java @@ -0,0 +1,809 @@ +package org.apache.hadoop.hive.accumulo.predicate; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.accumulo.core.client.IteratorSetting; +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; +import org.apache.hadoop.hive.accumulo.predicate.compare.CompareOp; +import org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.Equal; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.LessThan; +import org.apache.hadoop.hive.accumulo.predicate.compare.LessThanOrEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare; +import org.apache.hadoop.hive.accumulo.predicate.compare.NotEqual; +import org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison; +import org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare; +import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters; +import org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.index.IndexSearchCondition; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; + +public class TestAccumuloPredicateHandler { + @SuppressWarnings("unused") + private static final Logger log = Logger.getLogger(TestAccumuloPredicateHandler.class); + + private AccumuloPredicateHandler handler = AccumuloPredicateHandler.getInstance(); + private JobConf conf; + private ColumnMapper columnMapper; + + @Before + public void setup() throws TooManyAccumuloColumnsException { + FunctionRegistry.getFunctionNames(); + conf = new JobConf(); + List columnNames = Arrays.asList("field1", "rid"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); + conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string"); + + String columnMappingStr = "cf:f1,:rowID"; + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); + columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, + columnTypes); + } + + @Test + public void testGetRowIDSearchCondition() { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "hi"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqual(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + List sConditions = handler.getSearchConditions(conf); + assertEquals(sConditions.size(), 1); + } + + @Test() + public void testRangeEqual() throws SerDeException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqual(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + Collection ranges = handler.getRanges(conf, columnMapper); + assertEquals(ranges.size(), 1); + Range range = ranges.iterator().next(); + assertTrue(range.isStartKeyInclusive()); + assertFalse(range.isEndKeyInclusive()); + assertTrue(range.contains(new Key(new Text("aaa")))); + assertTrue(range.afterEndKey(new Key(new Text("aab")))); + assertTrue(range.beforeStartKey(new Key(new Text("aa")))); + } + + @Test() + public void testRangeGreaterThan() throws SerDeException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPGreaterThan(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + Collection ranges = handler.getRanges(conf, columnMapper); + assertEquals(ranges.size(), 1); + Range range = ranges.iterator().next(); + assertTrue(range.isStartKeyInclusive()); + assertFalse(range.isEndKeyInclusive()); + assertFalse(range.contains(new Key(new Text("aaa")))); + assertFalse(range.afterEndKey(new Key(new Text("ccccc")))); + assertTrue(range.contains(new Key(new Text("aab")))); + assertTrue(range.beforeStartKey(new Key(new Text("aa")))); + assertTrue(range.beforeStartKey(new Key(new Text("aaa")))); + } + + @Test + public void rangeGreaterThanOrEqual() throws SerDeException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + Collection ranges = handler.getRanges(conf, columnMapper); + assertEquals(ranges.size(), 1); + Range range = ranges.iterator().next(); + assertTrue(range.isStartKeyInclusive()); + assertFalse(range.isEndKeyInclusive()); + assertTrue(range.contains(new Key(new Text("aaa")))); + assertFalse(range.afterEndKey(new Key(new Text("ccccc")))); + assertTrue(range.contains(new Key(new Text("aab")))); + assertTrue(range.beforeStartKey(new Key(new Text("aa")))); + } + + @Test + public void rangeLessThan() throws SerDeException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPLessThan(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + Collection ranges = handler.getRanges(conf, columnMapper); + assertEquals(ranges.size(), 1); + Range range = ranges.iterator().next(); + assertTrue(range.isStartKeyInclusive()); + assertFalse(range.isEndKeyInclusive()); + assertFalse(range.contains(new Key(new Text("aaa")))); + assertTrue(range.afterEndKey(new Key(new Text("ccccc")))); + assertTrue(range.contains(new Key(new Text("aa")))); + assertTrue(range.afterEndKey(new Key(new Text("aab")))); + assertTrue(range.afterEndKey(new Key(new Text("aaa")))); + } + + @Test + public void rangeLessThanOrEqual() throws SerDeException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + Collection ranges = handler.getRanges(conf, columnMapper); + assertEquals(ranges.size(), 1); + Range range = ranges.iterator().next(); + assertTrue(range.isStartKeyInclusive()); + assertFalse(range.isEndKeyInclusive()); + assertTrue(range.contains(new Key(new Text("aaa")))); + assertTrue(range.afterEndKey(new Key(new Text("ccccc")))); + assertTrue(range.contains(new Key(new Text("aa")))); + assertTrue(range.afterEndKey(new Key(new Text("aab")))); + assertFalse(range.afterEndKey(new Key(new Text("aaa")))); + } + + @Test + public void testDisjointRanges() throws SerDeException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children); + assertNotNull(node); + + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPGreaterThan(), children2); + assertNotNull(node2); + + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + String filterExpr = Utilities.serializeExpression(both); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + Collection ranges = handler.getRanges(conf, columnMapper); + + // Impossible to get ranges for row <= 'aaa' and row >= 'bbb' + assertEquals(0, ranges.size()); + } + + @Test + public void testMultipleRanges() throws SerDeException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPLessThan(), children2); + assertNotNull(node2); + + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + String filterExpr = Utilities.serializeExpression(both); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + List ranges = handler.getRanges(conf, columnMapper); + assertEquals(1, ranges.size()); + Range range = ranges.get(0); + assertEquals(new Range(new Key("aaa"), true, new Key("bbb"), false), range); + } + + @Test + public void testPushdownTuple() throws SerDeException, NoSuchPrimitiveComparisonException, + NoSuchCompareOpException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field1", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqual(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + List sConditions = handler.getSearchConditions(conf); + assertEquals(sConditions.size(), 1); + IndexSearchCondition sc = sConditions.get(0); + PushdownTuple tuple = new PushdownTuple(sConditions.get(0), handler.getPrimitiveComparison(sc + .getColumnDesc().getTypeString(), sc), handler.getCompareOp(sc.getComparisonOp(), sc)); + byte[] expectedVal = new byte[4]; + ByteBuffer.wrap(expectedVal).putInt(5); + assertArrayEquals(tuple.getConstVal(), expectedVal); + assertEquals(tuple.getcOpt().getClass(), Equal.class); + assertEquals(tuple.getpCompare().getClass(), IntCompare.class); + } + + @Test(expected = NoSuchPrimitiveComparisonException.class) + public void testPushdownColumnTypeNotSupported() throws SerDeException, + NoSuchPrimitiveComparisonException, NoSuchCompareOpException { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.floatTypeInfo, "field1", null, + false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.floatTypeInfo, 5.5f); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqual(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + List sConditions = handler.getSearchConditions(conf); + assertEquals(sConditions.size(), 1); + IndexSearchCondition sc = sConditions.get(0); + + handler.getPrimitiveComparison(sc.getColumnDesc().getTypeString(), sc); + } + + @Test + public void testPushdownComparisonOptNotSupported() { + try { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, + false); + List children = Lists.newArrayList(); + children.add(column); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPNotNull(), children); + assertNotNull(node); + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + List sConditions = handler.getSearchConditions(conf); + assertEquals(sConditions.size(), 1); + IndexSearchCondition sc = sConditions.get(0); + new PushdownTuple(sc, handler.getPrimitiveComparison(sc.getColumnDesc().getTypeString(), sc), + handler.getCompareOp(sc.getComparisonOp(), sc)); + fail("Should fail: compare op not registered for index analyzer. Should leave undesirable residual predicate"); + } catch (RuntimeException e) { + assertTrue(e.getMessage().contains("Unexpected residual predicate: field1 is not null")); + } catch (Exception e) { + fail(StringUtils.stringifyException(e)); + } + } + + @Test + public void testIteratorIgnoreRowIDFields() { + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children); + assertNotNull(node); + + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPGreaterThan(), children2); + assertNotNull(node2); + + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + String filterExpr = Utilities.serializeExpression(both); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + try { + List iterators = handler.getIterators(conf, columnMapper); + assertEquals(iterators.size(), 0); + } catch (SerDeException e) { + StringUtils.stringifyException(e); + } + } + + @Test + public void testIgnoreIteratorPushdown() throws TooManyAccumuloColumnsException { + // Override what's placed in the Configuration by setup() + conf = new JobConf(); + List columnNames = Arrays.asList("field1", "field2", "rid"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); + conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); + conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string"); + + String columnMappingStr = "cf:f1,cf:f2,:rowID"; + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); + columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, + columnTypes); + + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, + false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children); + assertNotNull(node); + + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPGreaterThan(), children2); + assertNotNull(node2); + + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + String filterExpr = Utilities.serializeExpression(both); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + conf.setBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, false); + try { + List iterators = handler.getIterators(conf, columnMapper); + assertEquals(iterators.size(), 0); + } catch (Exception e) { + fail(StringUtils.stringifyException(e)); + } + } + + @Test + public void testCreateIteratorSettings() throws Exception { + // Override what's placed in the Configuration by setup() + conf = new JobConf(); + List columnNames = Arrays.asList("field1", "field2", "rid"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); + conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); + conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string"); + String columnMappingStr = "cf:f1,cf:f2,:rowID"; + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); + columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, + columnTypes); + + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, + false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children); + assertNotNull(node); + + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPGreaterThan(), children2); + assertNotNull(node2); + + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + String filterExpr = Utilities.serializeExpression(both); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + List iterators = handler.getIterators(conf, columnMapper); + assertEquals(iterators.size(), 2); + IteratorSetting is1 = iterators.get(0); + IteratorSetting is2 = iterators.get(1); + + boolean foundQual = false; + boolean foundPCompare = false; + boolean foundCOpt = false; + boolean foundConst = false; + for (Map.Entry option : is1.getOptions().entrySet()) { + String optKey = option.getKey(); + if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) { + foundQual = true; + assertEquals(option.getValue(), "cf:f1"); + } else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) { + foundConst = true; + assertEquals(option.getValue(), new String(Base64.encodeBase64("aaa".getBytes()))); + } else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) { + foundCOpt = true; + assertEquals(option.getValue(), LessThanOrEqual.class.getName()); + } else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) { + foundPCompare = true; + assertEquals(option.getValue(), StringCompare.class.getName()); + } + + } + assertTrue(foundConst & foundCOpt & foundPCompare & foundQual); + + foundQual = false; + foundPCompare = false; + foundCOpt = false; + foundConst = false; + for (Map.Entry option : is2.getOptions().entrySet()) { + String optKey = option.getKey(); + if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) { + foundQual = true; + assertEquals(option.getValue(), "cf:f2"); + } else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) { + foundConst = true; + byte[] intVal = new byte[4]; + ByteBuffer.wrap(intVal).putInt(5); + assertEquals(option.getValue(), new String(Base64.encodeBase64(intVal))); + } else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) { + foundCOpt = true; + assertEquals(option.getValue(), GreaterThan.class.getName()); + } else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) { + foundPCompare = true; + assertEquals(option.getValue(), IntCompare.class.getName()); + } + + } + assertTrue(foundConst & foundCOpt & foundPCompare & foundQual); + } + + @Test + public void testBasicOptLookup() throws NoSuchCompareOpException { + boolean foundEqual = false; + boolean foundNotEqual = false; + boolean foundGreaterThanOrEqual = false; + boolean foundGreaterThan = false; + boolean foundLessThanOrEqual = false; + boolean foundLessThan = false; + for (String opt : handler.cOpKeyset()) { + Class compOpt = handler.getCompareOpClass(opt); + if (compOpt.getName().equals(Equal.class.getName())) { + foundEqual = true; + } else if (compOpt.getName().equals(NotEqual.class.getName())) { + foundNotEqual = true; + } else if (compOpt.getName().equals(GreaterThan.class.getName())) { + foundGreaterThan = true; + } else if (compOpt.getName().equals(GreaterThanOrEqual.class.getName())) { + foundGreaterThanOrEqual = true; + } else if (compOpt.getName().equals(LessThan.class.getName())) { + foundLessThan = true; + } else if (compOpt.getName().equals(LessThanOrEqual.class.getName())) { + foundLessThanOrEqual = true; + } + } + assertTrue("Did not find Equal comparison op", foundEqual); + assertTrue("Did not find NotEqual comparison op", foundNotEqual); + assertTrue("Did not find GreaterThan comparison op", foundGreaterThan); + assertTrue("Did not find GreaterThanOrEqual comparison op", foundGreaterThanOrEqual); + assertTrue("Did not find LessThan comparison op", foundLessThan); + assertTrue("Did not find LessThanOrEqual comparison op", foundLessThanOrEqual); + } + + @Test(expected = NoSuchCompareOpException.class) + public void testNoOptFound() throws NoSuchCompareOpException { + handler.getCompareOpClass("blah"); + } + + @Test + public void testPrimitiveComparsionLookup() throws NoSuchPrimitiveComparisonException { + boolean foundLong = false; + boolean foundString = false; + boolean foundInt = false; + boolean foundDouble = false; + for (String type : handler.pComparisonKeyset()) { + Class pCompare = handler.getPrimitiveComparisonClass(type); + if (pCompare.getName().equals(DoubleCompare.class.getName())) { + foundDouble = true; + } else if (pCompare.getName().equals(LongCompare.class.getName())) { + foundLong = true; + } else if (pCompare.getName().equals(IntCompare.class.getName())) { + foundInt = true; + } else if (pCompare.getName().equals(StringCompare.class.getName())) { + foundString = true; + } + } + assertTrue("Did not find DoubleCompare op", foundDouble); + assertTrue("Did not find LongCompare op", foundLong); + assertTrue("Did not find IntCompare op", foundInt); + assertTrue("Did not find StringCompare op", foundString); + } + + @Test + public void testRowRangeIntersection() throws SerDeException { + // rowId >= 'f' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // rowId <= 'm' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + String filterExpr = Utilities.serializeExpression(both); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + // Should make ['f', 'm\0') + List ranges = handler.getRanges(conf, columnMapper); + assertEquals(1, ranges.size()); + assertEquals(new Range(new Key("f"), true, new Key("m\0"), false), ranges.get(0)); + } + + @Test + public void testRowRangeGeneration() throws SerDeException { + List columnNames = Arrays.asList("key", "column"); + List columnTypes = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); + conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,string"); + + String columnMappingStr = ":rowID,cf:f1"; + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); + columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, + columnTypes); + + // 100 < key + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "key", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 100); + List children = Lists.newArrayList(); + children.add(constant); + children.add(column); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPLessThan(), children); + assertNotNull(node); + + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + // Should make (100, +inf) + List ranges = handler.getRanges(conf, columnMapper); + Assert.assertEquals(1, ranges.size()); + Assert.assertEquals(new Range(new Text("100"), false, null, false), ranges.get(0)); + } + + @Test + public void testBinaryRangeGeneration() throws Exception { + List columnNames = Arrays.asList("key", "column"); + List columnTypes = Arrays. asList(TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo); + conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames)); + conf.set(serdeConstants.LIST_COLUMN_TYPES, "int,string"); + + String columnMappingStr = ":rowID#b,cf:f1"; + conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr); + columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, + columnTypes); + + int intValue = 100; + + // Make binary integer value in the bytearray + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + JavaIntObjectInspector intOI = (JavaIntObjectInspector) PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME)); + LazyUtils.writePrimitive(baos, intValue, intOI); + + // 100 < key + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "key", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, intValue); + List children = Lists.newArrayList(); + children.add(constant); + children.add(column); + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPLessThan(), children); + assertNotNull(node); + + String filterExpr = Utilities.serializeExpression(node); + conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr); + + // Should make (100, +inf) + List ranges = handler.getRanges(conf, columnMapper); + Assert.assertEquals(1, ranges.size()); + Assert.assertEquals(new Range(new Text(baos.toByteArray()), false, null, false), ranges.get(0)); + } + + @Test + public void testNullRangeGeneratorOutput() throws SerDeException { + // The AccumuloRangeGenerator produces an Object (due to the limitations of the + // traversal interface) which requires interpretation of that Object into Ranges. + // Changes in the return object from the AccumuloRangeGenerator must also represent + // a change in the AccumuloPredicateHandler. + AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); + ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); + String hiveRowIdColumnName = "rid"; + + Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); + Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(null); + Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); + + // A null result from AccumuloRangeGenerator is all ranges + Assert.assertEquals(Arrays.asList(new Range()), mockHandler.getRanges(conf, columnMapper)); + } + + @Test + public void testEmptyListRangeGeneratorOutput() throws SerDeException { + // The AccumuloRangeGenerator produces an Object (due to the limitations of the + // traversal interface) which requires interpretation of that Object into Ranges. + // Changes in the return object from the AccumuloRangeGenerator must also represent + // a change in the AccumuloPredicateHandler. + AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); + ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); + String hiveRowIdColumnName = "rid"; + + Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); + Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(Collections.emptyList()); + Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); + + // A null result from AccumuloRangeGenerator is all ranges + Assert.assertEquals(Collections.emptyList(), mockHandler.getRanges(conf, columnMapper)); + } + + @Test + public void testSingleRangeGeneratorOutput() throws SerDeException { + // The AccumuloRangeGenerator produces an Object (due to the limitations of the + // traversal interface) which requires interpretation of that Object into Ranges. + // Changes in the return object from the AccumuloRangeGenerator must also represent + // a change in the AccumuloPredicateHandler. + AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); + ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); + String hiveRowIdColumnName = "rid"; + Range r = new Range("a"); + + Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); + Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(r); + Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); + + // A null result from AccumuloRangeGenerator is all ranges + Assert.assertEquals(Collections.singletonList(r), mockHandler.getRanges(conf, columnMapper)); + } + + @Test + public void testManyRangesGeneratorOutput() throws SerDeException { + // The AccumuloRangeGenerator produces an Object (due to the limitations of the + // traversal interface) which requires interpretation of that Object into Ranges. + // Changes in the return object from the AccumuloRangeGenerator must also represent + // a change in the AccumuloPredicateHandler. + AccumuloPredicateHandler mockHandler = Mockito.mock(AccumuloPredicateHandler.class); + ExprNodeDesc root = Mockito.mock(ExprNodeDesc.class); + String hiveRowIdColumnName = "rid"; + Range r1 = new Range("a"), r2 = new Range("z"); + + Mockito.when(mockHandler.getRanges(conf, columnMapper)).thenCallRealMethod(); + Mockito.when(mockHandler.generateRanges(columnMapper, hiveRowIdColumnName, root)).thenReturn(Arrays.asList(r1, r2)); + Mockito.when(mockHandler.getExpression(conf)).thenReturn(root); + + // A null result from AccumuloRangeGenerator is all ranges + Assert.assertEquals(Arrays.asList(r1, r2), mockHandler.getRanges(conf, columnMapper)); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java new file mode 100644 index 0000000..339da07 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestAccumuloRangeGenerator.java @@ -0,0 +1,467 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.predicate; + +import static org.junit.Assert.assertNotNull; + +import java.sql.Date; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Range; +import org.apache.hadoop.hive.accumulo.AccumuloHiveConstants; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.UDFLike; +import org.apache.hadoop.hive.ql.udf.UDFToString; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.collect.Lists; + +/** + * + */ +public class TestAccumuloRangeGenerator { + + private AccumuloPredicateHandler handler; + private HiveAccumuloRowIdColumnMapping rowIdMapping; + + @Before + public void setup() { + handler = AccumuloPredicateHandler.getInstance(); + rowIdMapping = new HiveAccumuloRowIdColumnMapping(AccumuloHiveConstants.ROWID, + ColumnEncoding.STRING, "row", TypeInfoFactory.stringTypeInfo.toString()); + } + + @Test + public void testRangeConjunction() throws Exception { + // rowId >= 'f' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // rowId <= 'm' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + // And UDF + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + // Should generate [f,m] + List expectedRanges = Arrays + .asList(new Range(new Key("f"), true, new Key("m\0"), false)); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(both); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + Object result = nodeOutput.get(both); + Assert.assertNotNull(result); + Assert.assertTrue("Result from graph walk was not a List", result instanceof List); + @SuppressWarnings("unchecked") + List actualRanges = (List) result; + Assert.assertEquals(expectedRanges, actualRanges); + } + + @Test + public void testRangeDisjunction() throws Exception { + // rowId >= 'f' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // rowId <= 'm' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + // Or UDF + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPOr(), bothFilters); + + // Should generate (-inf,+inf) + List expectedRanges = Arrays.asList(new Range()); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(both); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + Object result = nodeOutput.get(both); + Assert.assertNotNull(result); + Assert.assertTrue("Result from graph walk was not a List", result instanceof List); + @SuppressWarnings("unchecked") + List actualRanges = (List) result; + Assert.assertEquals(expectedRanges, actualRanges); + } + + @Test + public void testRangeConjunctionWithDisjunction() throws Exception { + // rowId >= 'h' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "h"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // rowId <= 'd' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "d"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + // rowId >= 'q' + ExprNodeDesc column3 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant3 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "q"); + List children3 = Lists.newArrayList(); + children3.add(column3); + children3.add(constant3); + ExprNodeDesc node3 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children3); + assertNotNull(node3); + + // Or UDF, (rowId <= 'd' or rowId >= 'q') + List orFilters = Lists.newArrayList(); + orFilters.add(node2); + orFilters.add(node3); + ExprNodeGenericFuncDesc orNode = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPOr(), orFilters); + + // And UDF, (rowId >= 'h' and (rowId <= 'd' or rowId >= 'q')) + List andFilters = Lists.newArrayList(); + andFilters.add(node); + andFilters.add(orNode); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), andFilters); + + // Should generate ['q', +inf) + List expectedRanges = Arrays.asList(new Range(new Key("q"), true, null, false)); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(both); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + Object result = nodeOutput.get(both); + Assert.assertNotNull(result); + Assert.assertTrue("Result from graph walk was not a List", result instanceof List); + @SuppressWarnings("unchecked") + List actualRanges = (List) result; + Assert.assertEquals(expectedRanges, actualRanges); + } + + @Test + public void testPartialRangeConjunction() throws Exception { + // rowId >= 'f' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // anythingElse <= 'foo' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "anythingElse", + null, false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "foo"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + // And UDF + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + // Should generate [f,+inf) + List expectedRanges = Arrays.asList(new Range(new Key("f"), true, null, false)); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(both); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + Object result = nodeOutput.get(both); + Assert.assertNotNull(result); + Assert.assertTrue("Result from graph walk was not a List", result instanceof List); + @SuppressWarnings("unchecked") + List actualRanges = (List) result; + Assert.assertEquals(expectedRanges, actualRanges); + } + + @Test + public void testDateRangeConjunction() throws Exception { + // rowId >= '2014-01-01' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + Date.valueOf("2014-01-01")); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // rowId <= '2014-07-01' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, + Date.valueOf("2014-07-01")); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPLessThan(), children2); + assertNotNull(node2); + + // And UDF + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + // Should generate [2014-01-01, 2014-07-01) + List expectedRanges = Arrays.asList(new Range(new Key("2014-01-01"), true, new Key( + "2014-07-01"), false)); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(both); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + Object result = nodeOutput.get(both); + Assert.assertNotNull(result); + Assert.assertTrue("Result from graph walk was not a List", result instanceof List); + @SuppressWarnings("unchecked") + List actualRanges = (List) result; + Assert.assertEquals(expectedRanges, actualRanges); + } + + @Test + public void testCastExpression() throws Exception { + // 40 and 50 + ExprNodeDesc fourty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, + 40), fifty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 50); + + // + + GenericUDFOPPlus plus = new GenericUDFOPPlus(); + + // 40 + 50 + ExprNodeGenericFuncDesc addition = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, plus, Arrays.asList(fourty, fifty)); + + // cast(.... as string) + UDFToString stringCast = new UDFToString(); + GenericUDFBridge stringCastBridge = new GenericUDFBridge("cast", false, stringCast.getClass().getName()); + + // cast (40 + 50 as string) + ExprNodeGenericFuncDesc cast = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + stringCastBridge, "cast", Collections. singletonList(addition)); + + ExprNodeDesc key = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "key", null, + false); + + ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(key, cast)); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "key"); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(node); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + // Don't fail -- would be better to actually compute a range of [90,+inf) + Object result = nodeOutput.get(node); + Assert.assertNull(result); + } + + @Test + public void testRangeOverNonRowIdField() throws Exception { + // foo >= 'f' + ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "foo", null, false); + ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f"); + List children = Lists.newArrayList(); + children.add(column); + children.add(constant); + ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), children); + assertNotNull(node); + + // foo <= 'm' + ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "foo", null, + false); + ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m"); + List children2 = Lists.newArrayList(); + children2.add(column2); + children2.add(constant2); + ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPEqualOrLessThan(), children2); + assertNotNull(node2); + + // And UDF + List bothFilters = Lists.newArrayList(); + bothFilters.add(node); + bothFilters.add(node2); + ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, + new GenericUDFOPAnd(), bothFilters); + + AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid"); + Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, + Collections. emptyMap(), null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(both); + HashMap nodeOutput = new HashMap(); + + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + + // Filters are not over the rowid, therefore scan everything + Object result = nodeOutput.get(both); + Assert.assertNull(result); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestPrimitiveComparisonFilter.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestPrimitiveComparisonFilter.java new file mode 100644 index 0000000..95b6ba4 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/TestPrimitiveComparisonFilter.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.predicate; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.io.IntWritable; +import org.junit.Assert; +import org.junit.Test; + +/** + * + */ +public class TestPrimitiveComparisonFilter { + + @Test + public void testBase64ConstantEncode() { + PrimitiveComparisonFilter filter = new PrimitiveComparisonFilter(); + Map options = new HashMap(); + + for (int i = 0; i < 500; i++) { + String constant = Integer.toString(i); + options.put(PrimitiveComparisonFilter.CONST_VAL, new String(Base64.encodeBase64(constant.getBytes()))); + + Assert.assertEquals(constant, new String(filter.getConstant(options))); + } + } + + @Test + public void testNumericBase64ConstantEncode() throws IOException { + PrimitiveComparisonFilter filter = new PrimitiveComparisonFilter(); + Map options = new HashMap(); + IntWritable writable = new IntWritable(); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + + for (int i = 0; i < 500; i++) { + writable.set(i); + writable.write(out); + + options.put(PrimitiveComparisonFilter.CONST_VAL, new String(Base64.encodeBase64(baos.toByteArray()))); + + byte[] bytes = filter.getConstant(options); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + DataInputStream in = new DataInputStream(bais); + writable.readFields(in); + + Assert.assertEquals(i, writable.get()); + + baos.reset(); + } + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestDoubleCompare.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestDoubleCompare.java new file mode 100644 index 0000000..a6049c8 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestDoubleCompare.java @@ -0,0 +1,137 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; + +import org.junit.Before; +import org.junit.Test; + +public class TestDoubleCompare { + + private DoubleCompare doubleCompare; + + @Before + public void setup() { + doubleCompare = new DoubleCompare(); + byte[] db = new byte[8]; + ByteBuffer.wrap(db).putDouble(10.5d); + doubleCompare.init(db); + } + + public byte[] getBytes(double val) { + byte[] dBytes = new byte[8]; + ByteBuffer.wrap(dBytes).putDouble(val); + BigDecimal bd = doubleCompare.serialize(dBytes); + assertEquals(bd.doubleValue(), val, 0); + return dBytes; + } + + @Test + public void equal() { + Equal equalObj = new Equal(doubleCompare); + byte[] val = getBytes(10.5d); + assertTrue(equalObj.accept(val)); + } + + @Test + public void notEqual() { + NotEqual notEqualObj = new NotEqual(doubleCompare); + byte[] val = getBytes(11.0d); + assertTrue(notEqualObj.accept(val)); + + val = getBytes(10.5d); + assertFalse(notEqualObj.accept(val)); + + } + + @Test + public void greaterThan() { + GreaterThan greaterThanObj = new GreaterThan(doubleCompare); + byte[] val = getBytes(11.0d); + + assertTrue(greaterThanObj.accept(val)); + + val = getBytes(4.5d); + assertFalse(greaterThanObj.accept(val)); + + val = getBytes(10.5d); + assertFalse(greaterThanObj.accept(val)); + } + + @Test + public void greaterThanOrEqual() { + GreaterThanOrEqual greaterThanOrEqualObj = new GreaterThanOrEqual(doubleCompare); + + byte[] val = getBytes(11.0d); + + assertTrue(greaterThanOrEqualObj.accept(val)); + + val = getBytes(4.0d); + assertFalse(greaterThanOrEqualObj.accept(val)); + + val = getBytes(10.5d); + assertTrue(greaterThanOrEqualObj.accept(val)); + } + + @Test + public void lessThan() { + + LessThan lessThanObj = new LessThan(doubleCompare); + + byte[] val = getBytes(11.0d); + + assertFalse(lessThanObj.accept(val)); + + val = getBytes(4.0d); + assertTrue(lessThanObj.accept(val)); + + val = getBytes(10.5d); + assertFalse(lessThanObj.accept(val)); + + } + + @Test + public void lessThanOrEqual() { + + LessThanOrEqual lessThanOrEqualObj = new LessThanOrEqual(doubleCompare); + + byte[] val = getBytes(11.0d); + + assertFalse(lessThanOrEqualObj.accept(val)); + + val = getBytes(4.0d); + assertTrue(lessThanOrEqualObj.accept(val)); + + val = getBytes(10.5d); + assertTrue(lessThanOrEqualObj.accept(val)); + } + + @Test + public void like() { + try { + Like likeObj = new Like(doubleCompare); + assertTrue(likeObj.accept(new byte[] {})); + fail("should not accept"); + } catch (UnsupportedOperationException e) { + assertTrue(e.getMessage().contains( + "Like not supported for " + doubleCompare.getClass().getName())); + } + } + + @Test + public void invalidSerialization() { + try { + byte[] badVal = new byte[4]; + ByteBuffer.wrap(badVal).putInt(1); + doubleCompare.serialize(badVal); + fail("Should fail"); + } catch (RuntimeException e) { + assertTrue(e.getMessage().contains(" occurred trying to build double value")); + } + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestIntCompare.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestIntCompare.java new file mode 100644 index 0000000..9847a18 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestIntCompare.java @@ -0,0 +1,123 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.nio.ByteBuffer; + +import org.junit.Before; +import org.junit.Test; + +public class TestIntCompare { + private IntCompare intCompare; + + @Before + public void setup() { + byte[] ibytes = new byte[4]; + ByteBuffer.wrap(ibytes).putInt(10); + intCompare = new IntCompare(); + intCompare.init(ibytes); + } + + public byte[] getBytes(int val) { + byte[] intBytes = new byte[4]; + ByteBuffer.wrap(intBytes).putInt(val); + int serializedVal = intCompare.serialize(intBytes); + assertEquals(serializedVal, val); + return intBytes; + } + + @Test + public void equal() { + Equal equalObj = new Equal(intCompare); + byte[] val = getBytes(10); + assertTrue(equalObj.accept(val)); + } + + @Test + public void notEqual() { + NotEqual notEqualObj = new NotEqual(intCompare); + byte[] val = getBytes(11); + assertTrue(notEqualObj.accept(val)); + + val = getBytes(10); + assertFalse(notEqualObj.accept(val)); + + } + + @Test + public void greaterThan() { + GreaterThan greaterThanObj = new GreaterThan(intCompare); + byte[] val = getBytes(11); + + assertTrue(greaterThanObj.accept(val)); + + val = getBytes(4); + assertFalse(greaterThanObj.accept(val)); + + val = getBytes(10); + assertFalse(greaterThanObj.accept(val)); + } + + @Test + public void greaterThanOrEqual() { + GreaterThanOrEqual greaterThanOrEqualObj = new GreaterThanOrEqual(intCompare); + + byte[] val = getBytes(11); + + assertTrue(greaterThanOrEqualObj.accept(val)); + + val = getBytes(4); + assertFalse(greaterThanOrEqualObj.accept(val)); + + val = getBytes(10); + assertTrue(greaterThanOrEqualObj.accept(val)); + } + + @Test + public void lessThan() { + + LessThan lessThanObj = new LessThan(intCompare); + + byte[] val = getBytes(11); + + assertFalse(lessThanObj.accept(val)); + + val = getBytes(4); + assertTrue(lessThanObj.accept(val)); + + val = getBytes(10); + assertFalse(lessThanObj.accept(val)); + + } + + @Test + public void lessThanOrEqual() { + + LessThanOrEqual lessThanOrEqualObj = new LessThanOrEqual(intCompare); + + byte[] val = getBytes(11); + + assertFalse(lessThanOrEqualObj.accept(val)); + + val = getBytes(4); + assertTrue(lessThanOrEqualObj.accept(val)); + + val = getBytes(10); + assertTrue(lessThanOrEqualObj.accept(val)); + } + + @Test + public void like() { + try { + Like likeObj = new Like(intCompare); + assertTrue(likeObj.accept(new byte[] {})); + fail("should not accept"); + } catch (UnsupportedOperationException e) { + assertTrue(e.getMessage().contains( + "Like not supported for " + intCompare.getClass().getName())); + } + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestLongComparison.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestLongComparison.java new file mode 100644 index 0000000..2abd41b --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestLongComparison.java @@ -0,0 +1,136 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.nio.ByteBuffer; + +import org.junit.Before; +import org.junit.Test; + +public class TestLongComparison { + + private LongCompare longComp; + + @Before + public void setup() { + byte[] lBytes = new byte[8]; + ByteBuffer.wrap(lBytes).putLong(10l); + longComp = new LongCompare(); + longComp.init(lBytes); + } + + public byte[] getBytes(long val) { + byte[] lonBytes = new byte[8]; + ByteBuffer.wrap(lonBytes).putLong(val); + long lon = longComp.serialize(lonBytes); + assertEquals(lon, val); + return lonBytes; + } + + @Test + public void equal() { + Equal equalObj = new Equal(longComp); + byte[] val = getBytes(10l); + assertTrue(equalObj.accept(val)); + } + + @Test + public void notEqual() { + NotEqual notEqualObj = new NotEqual(longComp); + byte[] val = getBytes(11l); + assertTrue(notEqualObj.accept(val)); + + val = getBytes(10l); + assertFalse(notEqualObj.accept(val)); + + } + + @Test + public void greaterThan() { + GreaterThan greaterThanObj = new GreaterThan(longComp); + byte[] val = getBytes(11l); + + assertTrue(greaterThanObj.accept(val)); + + val = getBytes(4l); + assertFalse(greaterThanObj.accept(val)); + + val = getBytes(10l); + assertFalse(greaterThanObj.accept(val)); + } + + @Test + public void greaterThanOrEqual() { + GreaterThanOrEqual greaterThanOrEqualObj = new GreaterThanOrEqual(longComp); + + byte[] val = getBytes(11l); + + assertTrue(greaterThanOrEqualObj.accept(val)); + + val = getBytes(4l); + assertFalse(greaterThanOrEqualObj.accept(val)); + + val = getBytes(10l); + assertTrue(greaterThanOrEqualObj.accept(val)); + } + + @Test + public void lessThan() { + + LessThan lessThanObj = new LessThan(longComp); + + byte[] val = getBytes(11l); + + assertFalse(lessThanObj.accept(val)); + + val = getBytes(4l); + assertTrue(lessThanObj.accept(val)); + + val = getBytes(10l); + assertFalse(lessThanObj.accept(val)); + + } + + @Test + public void lessThanOrEqual() { + + LessThanOrEqual lessThanOrEqualObj = new LessThanOrEqual(longComp); + + byte[] val = getBytes(11l); + + assertFalse(lessThanOrEqualObj.accept(val)); + + val = getBytes(4l); + assertTrue(lessThanOrEqualObj.accept(val)); + + val = getBytes(10l); + assertTrue(lessThanOrEqualObj.accept(val)); + } + + @Test + public void like() { + try { + Like likeObj = new Like(longComp); + assertTrue(likeObj.accept(new byte[] {})); + fail("should not accept"); + } catch (UnsupportedOperationException e) { + assertTrue(e.getMessage().contains("Like not supported for " + longComp.getClass().getName())); + } + } + + @Test + public void invalidSerialization() { + try { + byte[] badVal = new byte[4]; + ByteBuffer.wrap(badVal).putInt(1); + longComp.serialize(badVal); + fail("Should fail"); + } catch (RuntimeException e) { + assertTrue(e.getMessage().contains(" occurred trying to build long value")); + } + } + +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestStringCompare.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestStringCompare.java new file mode 100644 index 0000000..08716bc --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/predicate/compare/TestStringCompare.java @@ -0,0 +1,122 @@ +package org.apache.hadoop.hive.accumulo.predicate.compare; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Before; +import org.junit.Test; + +public class TestStringCompare { + + private StringCompare strCompare; + + @Before + public void setup() { + strCompare = new StringCompare(); + strCompare.init("aaa".getBytes()); + } + + @Test + public void equal() { + Equal equalObj = new Equal(strCompare); + byte[] val = "aaa".getBytes(); + assertTrue(equalObj.accept(val)); + } + + @Test + public void notEqual() { + NotEqual notEqualObj = new NotEqual(strCompare); + byte[] val = "aab".getBytes(); + assertTrue(notEqualObj.accept(val)); + + val = "aaa".getBytes(); + assertFalse(notEqualObj.accept(val)); + + } + + @Test + public void greaterThan() { + GreaterThan greaterThanObj = new GreaterThan(strCompare); + byte[] val = "aab".getBytes(); + + assertTrue(greaterThanObj.accept(val)); + + val = "aa".getBytes(); + assertFalse(greaterThanObj.accept(val)); + + val = "aaa".getBytes(); + assertFalse(greaterThanObj.accept(val)); + } + + @Test + public void greaterThanOrEqual() { + GreaterThanOrEqual greaterThanOrEqualObj = new GreaterThanOrEqual(strCompare); + byte[] val = "aab".getBytes(); + + assertTrue(greaterThanOrEqualObj.accept(val)); + + val = "aa".getBytes(); + assertFalse(greaterThanOrEqualObj.accept(val)); + + val = "aaa".getBytes(); + assertTrue(greaterThanOrEqualObj.accept(val)); + } + + @Test + public void lessThan() { + + LessThan lessThanObj = new LessThan(strCompare); + + byte[] val = "aab".getBytes(); + + assertFalse(lessThanObj.accept(val)); + + val = "aa".getBytes(); + assertTrue(lessThanObj.accept(val)); + + val = "aaa".getBytes(); + assertFalse(lessThanObj.accept(val)); + + } + + @Test + public void lessThanOrEqual() { + + LessThanOrEqual lessThanOrEqualObj = new LessThanOrEqual(strCompare); + + byte[] val = "aab".getBytes(); + + assertFalse(lessThanOrEqualObj.accept(val)); + + val = "aa".getBytes(); + assertTrue(lessThanOrEqualObj.accept(val)); + + val = "aaa".getBytes(); + assertTrue(lessThanOrEqualObj.accept(val)); + } + + @Test + public void like() { + Like likeObj = new Like(strCompare); + String condition = "%a"; + assertTrue(likeObj.accept(condition.getBytes())); + + condition = "%a%"; + assertTrue(likeObj.accept(condition.getBytes())); + + condition = "a%"; + assertTrue(likeObj.accept(condition.getBytes())); + + condition = "a%aa"; + assertFalse(likeObj.accept(condition.getBytes())); + + condition = "b%"; + assertFalse(likeObj.accept(condition.getBytes())); + + condition = "%ab%"; + assertFalse(likeObj.accept(condition.getBytes())); + + condition = "%ba"; + assertFalse(likeObj.accept(condition.getBytes())); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/DelimitedAccumuloRowIdFactory.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/DelimitedAccumuloRowIdFactory.java new file mode 100644 index 0000000..4bb5419 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/DelimitedAccumuloRowIdFactory.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.log4j.Logger; + +/** + * Example AccumuloRowIdFactory which accepts a delimiter that is used to separate the components of + * some struct to place in the rowId. + */ +public class DelimitedAccumuloRowIdFactory extends DefaultAccumuloRowIdFactory { + private static final Logger log = Logger.getLogger(DelimitedAccumuloRowIdFactory.class); + public static final String ACCUMULO_COMPOSITE_DELIMITER = "accumulo.composite.delimiter"; + + private byte separator; + + @Override + public void init(AccumuloSerDeParameters accumuloSerDeParams, Properties properties) + throws SerDeException { + super.init(accumuloSerDeParams, properties); + + String delimiter = properties.getProperty(ACCUMULO_COMPOSITE_DELIMITER); + if (null == delimiter || delimiter.isEmpty()) { + throw new SerDeException("Did not find expected delimiter in configuration: " + + ACCUMULO_COMPOSITE_DELIMITER); + } + + if (delimiter.length() != 1) { + log.warn("Configured delimiter is longer than one character, only using first character"); + } + + separator = (byte) delimiter.charAt(0); + + log.info("Initialized DelimitedAccumuloRowIdFactory with separator of '" + separator + "'"); + } + + @Override + public ObjectInspector createRowIdObjectInspector(TypeInfo type) throws SerDeException { + return LazyFactory.createLazyObjectInspector(type, new byte[] {separator}, 0, + serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); + } + + @Override + public LazyObjectBase createRowId(ObjectInspector inspector) throws SerDeException { + LazyObjectBase lazyObj = LazyFactory.createLazyObject(inspector, + ColumnEncoding.BINARY == rowIdMapping.getEncoding()); + log.info("Created " + lazyObj.getClass() + " for rowId with inspector " + inspector.getClass()); + return lazyObj; + } + + @Override + public byte[] serializeRowId(Object object, StructField field, ByteStream.Output output) + throws IOException { + ObjectInspector inspector = field.getFieldObjectInspector(); + if (inspector.getCategory() != ObjectInspector.Category.STRUCT) { + throw new IllegalStateException("invalid type value " + inspector.getTypeName()); + } + + output.reset(); + + StructObjectInspector structOI = (StructObjectInspector) inspector; + List elements = structOI.getStructFieldsDataAsList(object); + List fields = structOI.getAllStructFieldRefs(); + for (int i = 0; i < elements.size(); i++) { + Object o = elements.get(i); + StructField structField = fields.get(i); + + if (output.getLength() > 0) { + output.write(separator); + } + + serializer.writeWithLevel(structField.getFieldObjectInspector(), o, output, rowIdMapping, 1); + } + + return output.toByteArray(); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java new file mode 100644 index 0000000..e047ae5 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.util.Arrays; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.util.StringUtils; +import org.apache.log4j.Logger; + +/** + * Gets the first character of each string in a struct + */ +public class FirstCharAccumuloCompositeRowId extends AccumuloCompositeRowId { + private static final Logger log = Logger.getLogger(FirstCharAccumuloCompositeRowId.class); + + private Properties tbl; + private Configuration conf; + private byte[] bytes; + private int start, length; + private String bytesAsString; + + public FirstCharAccumuloCompositeRowId(LazySimpleStructObjectInspector oi, Properties tbl, + Configuration conf) { + super(oi); + this.tbl = tbl; + this.conf = conf; + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + this.bytes = bytes.getData(); + this.start = start; + this.length = length; + } + + @Override + public Object getField(int fieldID) { + if (bytesAsString == null) { + this.bytesAsString = new String(bytes, start, length); + } + + log.info("Data: " + bytesAsString + ", " + Arrays.toString(bytes)); + + // The separator for the hive row would be using \x02, so the separator for this struct would be + // \x02 + 1 = \x03 + char separator = (char) ((int) oi.getSeparator() + 1); + + log.info("Separator: " + String.format("%04x", (int) separator)); + + // Get the character/byte at the offset in the string equal to the fieldID + String[] fieldBytes = StringUtils.split(bytesAsString, separator); + + log.info("Fields: " + Arrays.toString(fieldBytes)); + + return toLazyObject(fieldID, new byte[] {(byte) fieldBytes[fieldID].charAt(0)}); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java new file mode 100644 index 0000000..f613a58 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.accumulo.core.data.ColumnUpdate; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapping; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyStruct; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import com.google.common.base.Joiner; + +/** + * + */ +public class TestAccumuloRowSerializer { + + @Test + public void testBufferResetBeforeUse() throws IOException { + ByteStream.Output output = new ByteStream.Output(); + PrimitiveObjectInspector fieldObjectInspector = Mockito.mock(StringObjectInspector.class); + ColumnMapping mapping = Mockito.mock(ColumnMapping.class); + + // Write some garbage to the buffer that should be erased + output.write("foobar".getBytes()); + + // Stub out the serializer + AccumuloRowSerializer serializer = Mockito.mock(AccumuloRowSerializer.class); + + String object = "hello"; + + Mockito.when( + serializer.getSerializedValue(Mockito.any(ObjectInspector.class), Mockito.any(), + Mockito.any(ByteStream.Output.class), Mockito.any(ColumnMapping.class))) + .thenCallRealMethod(); + + Mockito.when(fieldObjectInspector.getCategory()).thenReturn(ObjectInspector.Category.PRIMITIVE); + Mockito.when(fieldObjectInspector.getPrimitiveCategory()).thenReturn(PrimitiveCategory.STRING); + Mockito.when(fieldObjectInspector.getPrimitiveWritableObject(Mockito.any(Object.class))) + .thenReturn(new Text(object)); + Mockito.when(mapping.getEncoding()).thenReturn(ColumnEncoding.STRING); + + // Invoke the method + serializer.getSerializedValue(fieldObjectInspector, object, output, mapping); + + // Verify the buffer was reset (real output doesn't happen because it was mocked) + Assert.assertEquals(0, output.size()); + } + + @Test + public void testBinarySerialization() throws IOException, SerDeException { + List columns = Arrays.asList("row", "cq1", "cq2", "cq3"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); + List typeNames = new ArrayList(types.size()); + for (TypeInfo type : types) { + typeNames.add(type.getTypeName()); + } + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, + ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3"); + tableProperties.setProperty(serdeConstants.FIELD_DELIM, " "); + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames)); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(columns, types, serDeParams.getSeparators(), + serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), + accumuloSerDeParams.getRowIdFactory()); + + // Create the LazyStruct from the LazyStruct...Inspector + LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi); + + ByteArrayRef byteRef = new ByteArrayRef(); + byteRef.setData(new byte[] {'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', + 'l', 'u', 'e'}); + obj.init(byteRef, 0, byteRef.getData().length); + + Mutation m = (Mutation) serializer.serialize(obj, oi); + + Assert.assertArrayEquals("row1".getBytes(), m.getRow()); + + List updates = m.getUpdates(); + Assert.assertEquals(3, updates.size()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + + ColumnUpdate update = updates.get(0); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq1", new String(update.getColumnQualifier())); + + out.writeInt(10); + Assert.assertArrayEquals(baos.toByteArray(), update.getValue()); + + update = updates.get(1); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq2", new String(update.getColumnQualifier())); + + baos.reset(); + out.writeInt(20); + Assert.assertArrayEquals(baos.toByteArray(), update.getValue()); + + update = updates.get(2); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq3", new String(update.getColumnQualifier())); + + Assert.assertEquals("value", new String(update.getValue())); + } + + @Test + public void testVisibilityLabel() throws IOException, SerDeException { + List columns = Arrays.asList("row", "cq1", "cq2", "cq3"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo); + List typeNames = new ArrayList(types.size()); + for (TypeInfo type : types) { + typeNames.add(type.getTypeName()); + } + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, + ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3"); + tableProperties.setProperty(serdeConstants.FIELD_DELIM, " "); + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames)); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory + .createLazyStructInspector(columns, types, serDeParams.getSeparators(), + serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), + accumuloSerDeParams.getRowIdFactory()); + + // Create the LazyStruct from the LazyStruct...Inspector + LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi); + + ByteArrayRef byteRef = new ByteArrayRef(); + byteRef.setData(new byte[] {'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', + 'l', 'u', 'e'}); + obj.init(byteRef, 0, byteRef.getData().length); + + Mutation m = (Mutation) serializer.serialize(obj, oi); + + Assert.assertArrayEquals("row1".getBytes(), m.getRow()); + + List updates = m.getUpdates(); + Assert.assertEquals(3, updates.size()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(baos); + + ColumnUpdate update = updates.get(0); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq1", new String(update.getColumnQualifier())); + Assert.assertEquals("foo", new String(update.getColumnVisibility())); + + out.writeInt(10); + Assert.assertArrayEquals(baos.toByteArray(), update.getValue()); + + update = updates.get(1); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq2", new String(update.getColumnQualifier())); + Assert.assertEquals("foo", new String(update.getColumnVisibility())); + + baos.reset(); + out.writeInt(20); + Assert.assertArrayEquals(baos.toByteArray(), update.getValue()); + + update = updates.get(2); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq3", new String(update.getColumnQualifier())); + Assert.assertEquals("foo", new String(update.getColumnVisibility())); + + Assert.assertEquals("value", new String(update.getValue())); + } + + @Test + public void testMapSerialization() throws IOException, SerDeException { + List columns = Arrays.asList("row", "col"); + List types = Arrays. asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory + .getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo)); + List typeNames = new ArrayList(types.size()); + for (TypeInfo type : types) { + typeNames.add(type.getTypeName()); + } + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:*"); + tableProperties.setProperty(serdeConstants.FIELD_DELIM, " "); + tableProperties.setProperty(serdeConstants.COLLECTION_DELIM, ","); + tableProperties.setProperty(serdeConstants.MAPKEY_DELIM, ":"); + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames)); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory + .createLazyObjectInspector(stringTypeInfo, new byte[] {0}, 0, + serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( + stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory + .getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), (byte) ' ', + serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), + accumuloSerDeParams.getRowIdFactory()); + + // Create the LazyStruct from the LazyStruct...Inspector + LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(structOI); + + ByteArrayRef byteRef = new ByteArrayRef(); + byteRef.setData("row1 cq1:10,cq2:20,cq3:value".getBytes()); + obj.init(byteRef, 0, byteRef.getData().length); + + Mutation m = (Mutation) serializer.serialize(obj, structOI); + + Assert.assertArrayEquals("row1".getBytes(), m.getRow()); + + List updates = m.getUpdates(); + Assert.assertEquals(3, updates.size()); + + ColumnUpdate update = updates.get(0); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq1", new String(update.getColumnQualifier())); + Assert.assertEquals("10", new String(update.getValue())); + + update = updates.get(1); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq2", new String(update.getColumnQualifier())); + Assert.assertEquals("20", new String(update.getValue())); + + update = updates.get(2); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq3", new String(update.getColumnQualifier())); + Assert.assertEquals("value", new String(update.getValue())); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidRowIdOffset() throws SerDeException { + ArrayList mappings = new ArrayList(); + + // Should fail because of the -1 + new AccumuloRowSerializer(-1, null, mappings, new ColumnVisibility(), null); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java new file mode 100644 index 0000000..bf3acd0 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java @@ -0,0 +1,467 @@ +package org.apache.hadoop.hive.accumulo.serde; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; + +import org.apache.accumulo.core.data.ColumnUpdate; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.AccumuloHiveRow; +import org.apache.hadoop.hive.accumulo.LazyAccumuloRow; +import org.apache.hadoop.hive.accumulo.columns.InvalidColumnMappingException; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyArray; +import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyMap; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyString; +import org.apache.hadoop.hive.serde2.lazy.LazyStruct; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.base.Joiner; + +public class TestAccumuloSerDe { + @SuppressWarnings("unused") + private static final Logger log = Logger.getLogger(TestAccumuloSerDe.class); + + protected AccumuloSerDe serde; + + @Before + public void setup() { + serde = new AccumuloSerDe(); + } + + @Test(expected = TooManyHiveColumnsException.class) + public void moreHiveColumnsThanAccumuloColumns() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1,field2,field3,field4"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string,string,string"); + + serde.initialize(conf, properties); + serde.deserialize(new Text("fail")); + } + + @Test(expected = TooManyAccumuloColumnsException.class) + public void moreAccumuloColumnsThanHiveColumns() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1,field2"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string"); + + serde.initialize(conf, properties); + serde.deserialize(new Text("fail")); + } + + @Test(expected = NullPointerException.class) + public void emptyConfiguration() throws SerDeException { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + serde.initialize(conf, properties); + } + + @Test + public void simpleColumnMapping() throws SerDeException { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1,field2,field3"); + + serde.initialize(conf, properties); + assertNotNull(serde.getCachedRow()); + } + + @Test + public void withRowID() throws SerDeException { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:f1,:rowID,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "field1,field2,field3,field4"); + serde.initialize(conf, properties); + assertNotNull(serde.getCachedRow()); + } + + @Test(expected = InvalidColumnMappingException.class) + public void invalidColMapping() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "field2,field3,field4"); + + serde.initialize(conf, properties); + AccumuloHiveRow row = new AccumuloHiveRow(); + row.setRowId("r1"); + Object obj = serde.deserialize(row); + assertTrue(obj instanceof LazyAccumuloRow); + LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj; + lazyRow.getField(0); + } + + @Test(expected = TooManyAccumuloColumnsException.class) + public void deserializeWithTooFewHiveColumns() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col1,col2"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string"); + + serde.initialize(conf, properties); + serde.deserialize(new Text("fail")); + } + + @Test + public void testArraySerialization() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,array"); + properties.setProperty(serdeConstants.COLLECTION_DELIM, ":"); + + // Get one of the default separators to avoid having to set a custom separator + char separator = ':'; + + serde.initialize(conf, properties); + + AccumuloHiveRow row = new AccumuloHiveRow(); + row.setRowId("r1"); + row.add("cf", "vals", ("value1" + separator + "value2" + separator + "value3").getBytes()); + + Object obj = serde.deserialize(row); + + assertNotNull(obj); + assertTrue(obj instanceof LazyAccumuloRow); + + LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj; + Object field0 = lazyRow.getField(0); + assertNotNull(field0); + assertTrue(field0 instanceof LazyString); + assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString()); + + Object field1 = lazyRow.getField(1); + assertNotNull(field1); + assertTrue(field1 instanceof LazyArray); + LazyArray array = (LazyArray) field1; + + List values = array.getList(); + assertEquals(3, values.size()); + for (int i = 0; i < 3; i++) { + Object o = values.get(i); + assertNotNull(o); + assertTrue(o instanceof LazyString); + assertEquals("value" + (i + 1), ((LazyString) o).getWritableObject().toString()); + } + } + + @Test + public void testMapSerialization() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:vals"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,values"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,map"); + properties.setProperty(serdeConstants.COLLECTION_DELIM, ":"); + properties.setProperty(serdeConstants.MAPKEY_DELIM, "="); + + // Get one of the default separators to avoid having to set a custom separator + char collectionSeparator = ':', kvSeparator = '='; + + serde.initialize(conf, properties); + + AccumuloHiveRow row = new AccumuloHiveRow(); + row.setRowId("r1"); + row.add("cf", "vals", ("k1" + kvSeparator + "v1" + collectionSeparator + "k2" + kvSeparator + + "v2" + collectionSeparator + "k3" + kvSeparator + "v3").getBytes()); + + Object obj = serde.deserialize(row); + + assertNotNull(obj); + assertTrue(obj instanceof LazyAccumuloRow); + + LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj; + Object field0 = lazyRow.getField(0); + assertNotNull(field0); + assertTrue(field0 instanceof LazyString); + assertEquals(row.getRowId(), ((LazyString) field0).getWritableObject().toString()); + + Object field1 = lazyRow.getField(1); + assertNotNull(field1); + assertTrue(field1 instanceof LazyMap); + LazyMap map = (LazyMap) field1; + + Map untypedMap = map.getMap(); + assertEquals(3, map.getMapSize()); + Set expectedKeys = new HashSet(); + expectedKeys.add("k1"); + expectedKeys.add("k2"); + expectedKeys.add("k3"); + for (Entry entry : untypedMap.entrySet()) { + assertNotNull(entry.getKey()); + assertTrue(entry.getKey() instanceof LazyString); + LazyString key = (LazyString) entry.getKey(); + + assertNotNull(entry.getValue()); + assertTrue(entry.getValue() instanceof LazyString); + LazyString value = (LazyString) entry.getValue(); + + String strKey = key.getWritableObject().toString(), strValue = value.getWritableObject() + .toString(); + + assertTrue(expectedKeys.remove(strKey)); + + assertEquals(2, strValue.length()); + assertTrue(strValue.startsWith("v")); + assertTrue(strValue.endsWith(Character.toString(strKey.charAt(1)))); + } + + assertTrue("Did not find expected keys: " + expectedKeys, expectedKeys.isEmpty()); + } + + @Test + public void deserialization() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1,cf:f2,cf:f3"); + + properties.setProperty(serdeConstants.LIST_COLUMNS, "blah,field2,field3,field4"); + serde.initialize(conf, properties); + + AccumuloHiveRow row = new AccumuloHiveRow(); + row.setRowId("r1"); + row.add("cf", "f1", "v1".getBytes()); + row.add("cf", "f2", "v2".getBytes()); + + Object obj = serde.deserialize(row); + assertTrue(obj instanceof LazyAccumuloRow); + + LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj; + Object field0 = lazyRow.getField(0); + assertNotNull(field0); + assertTrue(field0 instanceof LazyString); + assertEquals(field0.toString(), "r1"); + + Object field1 = lazyRow.getField(1); + assertNotNull(field1); + assertTrue("Expected instance of LazyString but was " + field1.getClass(), + field1 instanceof LazyString); + assertEquals(field1.toString(), "v1"); + + Object field2 = lazyRow.getField(2); + assertNotNull(field2); + assertTrue(field2 instanceof LazyString); + assertEquals(field2.toString(), "v2"); + } + + @Test + public void testNoVisibilitySetsEmptyVisibility() throws SerDeException { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:f1,:rowID"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "field1,field2"); + + serde.initialize(conf, properties); + + AccumuloRowSerializer serializer = serde.getSerializer(); + + Assert.assertEquals(new ColumnVisibility(), serializer.getVisibility()); + } + + @Test + public void testColumnVisibilityForSerializer() throws SerDeException { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, "cf:f1,:rowID"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "field1,field2"); + properties.setProperty(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY, "foobar"); + + serde.initialize(conf, properties); + + AccumuloRowSerializer serializer = serde.getSerializer(); + + Assert.assertEquals(new ColumnVisibility("foobar"), serializer.getVisibility()); + } + + @Test + public void testCompositeKeyDeserialization() throws Exception { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:f1"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,field1"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, + "struct,string"); + properties.setProperty(DelimitedAccumuloRowIdFactory.ACCUMULO_COMPOSITE_DELIMITER, "_"); + properties.setProperty(AccumuloSerDeParameters.COMPOSITE_ROWID_FACTORY, + DelimitedAccumuloRowIdFactory.class.getName()); + + serde.initialize(conf, properties); + + AccumuloHiveRow row = new AccumuloHiveRow(); + row.setRowId("p1_p2_p3"); + row.add("cf", "f1", "v1".getBytes()); + + Object obj = serde.deserialize(row); + assertTrue(obj instanceof LazyAccumuloRow); + + LazyAccumuloRow lazyRow = (LazyAccumuloRow) obj; + Object field0 = lazyRow.getField(0); + assertNotNull(field0); + assertTrue(field0 instanceof LazyStruct); + LazyStruct struct = (LazyStruct) field0; + List fields = struct.getFieldsAsList(); + assertEquals(3, fields.size()); + for (int i = 0; i < fields.size(); i++) { + assertEquals(LazyString.class, fields.get(i).getClass()); + assertEquals("p" + (i + 1), fields.get(i).toString()); + } + + Object field1 = lazyRow.getField(1); + assertNotNull(field1); + assertTrue("Expected instance of LazyString but was " + field1.getClass(), + field1 instanceof LazyString); + assertEquals(field1.toString(), "v1"); + } + + @Test + public void testStructOfMapSerialization() throws IOException, SerDeException { + List columns = Arrays.asList("row", "col"); + List structColNames = Arrays.asList("map1", "map2"); + TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + + // struct,map2:map>,string + List types = Arrays. asList( + TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), + TypeInfoFactory.stringTypeInfo); + + Properties tableProperties = new Properties(); + tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq"); + // Use the default separators [0, 1, 2, 3, ..., 7] + tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns)); + tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types)); + AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), + tableProperties, AccumuloSerDe.class.getSimpleName()); + SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); + + byte[] seps = serDeParams.getSeparators(); + + // struct_map>> + + TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory + .createLazyObjectInspector(stringTypeInfo, new byte[] {0}, 0, + serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( + stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory + .getLazySimpleStructObjectInspector(structColNames, + Arrays. asList(mapOI, mapOI), (byte) seps[2], + serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory + .getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], + serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), + serDeParams.isEscaped(), serDeParams.getEscapeChar()); + + AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, + accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), + accumuloSerDeParams.getRowIdFactory()); + + Map map1 = new HashMap(), map2 = new HashMap(); + + map1.put("key10", "value10"); + map1.put("key11", "value11"); + + map2.put("key20", "value20"); + map2.put("key21", "value21"); + + ByteArrayRef byteRef = new ByteArrayRef(); + // Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is + // (byte) 2 + // The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and + // \x05 + String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21"; + LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI); + byteRef.setData((accumuloRow + "\2foo").getBytes()); + entireStruct.init(byteRef, 0, byteRef.getData().length); + + Mutation m = serializer.serialize(entireStruct, structOI); + Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow()); + Assert.assertEquals(1, m.getUpdates().size()); + ColumnUpdate update = m.getUpdates().get(0); + Assert.assertEquals("cf", new String(update.getColumnFamily())); + Assert.assertEquals("cq", new String(update.getColumnQualifier())); + Assert.assertEquals("foo", new String(update.getValue())); + + AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow())); + haRow.add("cf", "cq", "foo".getBytes()); + + LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI); + lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), + accumuloSerDeParams.getRowIdFactory()); + + List objects = lazyAccumuloRow.getFieldsAsList(); + Assert.assertEquals(2, objects.size()); + + Assert.assertEquals("foo", objects.get(1).toString()); + + LazyStruct rowStruct = (LazyStruct) objects.get(0); + List rowObjects = rowStruct.getFieldsAsList(); + Assert.assertEquals(2, rowObjects.size()); + + LazyMap rowMap = (LazyMap) rowObjects.get(0); + Map actualMap = rowMap.getMap(); + System.out.println("Actual map 1: " + actualMap); + Map actualStringMap = new HashMap(); + for (Entry entry : actualMap.entrySet()) { + actualStringMap.put(entry.getKey().toString(), entry.getValue().toString()); + } + + Assert.assertEquals(map1, actualStringMap); + + rowMap = (LazyMap) rowObjects.get(1); + actualMap = rowMap.getMap(); + System.out.println("Actual map 2: " + actualMap); + actualStringMap = new HashMap(); + for (Entry entry : actualMap.entrySet()) { + actualStringMap.put(entry.getKey().toString(), entry.getValue().toString()); + } + + Assert.assertEquals(map2, actualStringMap); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDeParameters.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDeParameters.java new file mode 100644 index 0000000..216f924 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDeParameters.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.util.Properties; + +import org.apache.accumulo.core.security.Authorizations; +import org.apache.accumulo.core.security.ColumnVisibility; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.junit.Assert; +import org.junit.Test; + +/** + * + */ +public class TestAccumuloSerDeParameters { + + @Test + public void testParseColumnVisibility() throws SerDeException { + Properties properties = new Properties(); + Configuration conf = new Configuration(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "field1,field2,field3"); + properties.setProperty(serdeConstants.LIST_TYPE_NAME, "string,string,string"); + properties.setProperty(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY, "foo&bar"); + + AccumuloSerDeParameters params = new AccumuloSerDeParameters(conf, properties, + AccumuloSerDe.class.getName()); + + ColumnVisibility cv = params.getTableVisibilityLabel(); + + Assert.assertEquals(new ColumnVisibility("foo&bar"), cv); + } + + @Test + public void testParseAuthorizationsFromConf() throws SerDeException { + Configuration conf = new Configuration(false); + conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar"); + + Authorizations auths = AccumuloSerDeParameters.getAuthorizationsFromConf(conf); + Assert.assertEquals(new Authorizations("foo,bar"), auths); + } + + @Test + public void testParseAuthorizationsFromnProperties() throws SerDeException { + Configuration conf = new Configuration(); + Properties properties = new Properties(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "field1,field2,field3"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string"); + properties.setProperty(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar"); + + AccumuloSerDeParameters params = new AccumuloSerDeParameters(conf, properties, + AccumuloSerDe.class.getName()); + + Authorizations auths = params.getAuthorizations(); + Assert.assertEquals(new Authorizations("foo,bar"), auths); + } + + @Test + public void testNullAuthsFromProperties() throws SerDeException { + Configuration conf = new Configuration(); + Properties properties = new Properties(); + + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:f2,cf:f3"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "field1,field2,field3"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,string,string"); + + AccumuloSerDeParameters params = new AccumuloSerDeParameters(conf, properties, + AccumuloSerDe.class.getName()); + + Authorizations auths = params.getAuthorizations(); + Assert.assertNull(auths); + } + + @Test + public void testNullAuthsFromConf() throws SerDeException { + Configuration conf = new Configuration(false); + + Authorizations auths = AccumuloSerDeParameters.getAuthorizationsFromConf(conf); + Assert.assertNull(auths); + } +} diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java new file mode 100644 index 0000000..d464740 --- /dev/null +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo.serde; + +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.accumulo.columns.ColumnEncoding; +import org.apache.hadoop.hive.accumulo.columns.ColumnMapper; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazyString; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.junit.Assert; +import org.junit.Test; + +/** + * + */ +public class TestDefaultAccumuloRowIdFactory { + + @Test + public void testCorrectPrimitiveInspectors() throws SerDeException { + AccumuloSerDe accumuloSerDe = new AccumuloSerDe(); + + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, + "string,int"); + + accumuloSerDe.initialize(conf, properties); + + AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory(); + List columnTypes = accumuloSerDe.getParams().getHiveColumnTypes(); + ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper(); + SerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters(); + + List OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory); + + Assert.assertEquals(2, OIs.size()); + Assert.assertEquals(LazyStringObjectInspector.class, OIs.get(0).getClass()); + Assert.assertEquals(LazyIntObjectInspector.class, OIs.get(1).getClass()); + } + + @Test + public void testCorrectComplexInspectors() throws SerDeException { + AccumuloSerDe accumuloSerDe = new AccumuloSerDe(); + + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, + "struct,map"); + + accumuloSerDe.initialize(conf, properties); + + AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory(); + List columnTypes = accumuloSerDe.getParams().getHiveColumnTypes(); + ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper(); + SerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters(); + + List OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory); + + // Expect the correct OIs + Assert.assertEquals(2, OIs.size()); + Assert.assertEquals(LazySimpleStructObjectInspector.class, OIs.get(0).getClass()); + Assert.assertEquals(LazyMapObjectInspector.class, OIs.get(1).getClass()); + + LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) OIs.get(0); + Assert.assertEquals(2, (int) structOI.getSeparator()); + + LazyMapObjectInspector mapOI = (LazyMapObjectInspector) OIs.get(1); + Assert.assertEquals(2, (int) mapOI.getItemSeparator()); + Assert.assertEquals(3, (int) mapOI.getKeyValueSeparator()); + } + + @Test + public void testBinaryStringRowId() throws SerDeException { + AccumuloSerDe accumuloSerDe = new AccumuloSerDe(); + + Properties properties = new Properties(); + Configuration conf = new Configuration(); + properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq"); + properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col"); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, + "string,string"); + properties.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName()); + + accumuloSerDe.initialize(conf, properties); + + DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory(); + rowIdFactory.init(accumuloSerDe.getParams(), properties); + + LazyStringObjectInspector oi = LazyPrimitiveObjectInspectorFactory.getLazyStringObjectInspector(false, (byte) '\\'); + LazyObjectBase lazyObj = rowIdFactory.createRowId(oi); + Assert.assertNotNull(lazyObj); + Assert.assertTrue(LazyString.class.isAssignableFrom(lazyObj.getClass())); + } + +} diff --git a/accumulo-handler/src/test/queries/positive/accumulo_custom_key.q b/accumulo-handler/src/test/queries/positive/accumulo_custom_key.q new file mode 100644 index 0000000..6684fd3 --- /dev/null +++ b/accumulo-handler/src/test/queries/positive/accumulo_custom_key.q @@ -0,0 +1,22 @@ +CREATE TABLE accumulo_ck_1(key struct, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom", + "accumulo.columns.mapping" = ":rowid,cf:string", + "accumulo.composite.rowid.factory"="org.apache.hadoop.hive.accumulo.serde.DelimitedAccumuloRowIdFactory", + "accumulo.composite.delimiter" = "$"); + +CREATE EXTERNAL TABLE accumulo_ck_2(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom", + "accumulo.columns.mapping" = ":rowid,cf:string"); + +insert overwrite table accumulo_ck_1 select struct('1000','2000','3000'),'value' +from src where key = 100; + +select * from accumulo_ck_1; +select * from accumulo_ck_2; + +DROP TABLE accumulo_ck_1; +DROP TABLE accumulo_ck_2; diff --git a/accumulo-handler/src/test/queries/positive/accumulo_custom_key2.q b/accumulo-handler/src/test/queries/positive/accumulo_custom_key2.q new file mode 100644 index 0000000..038633f --- /dev/null +++ b/accumulo-handler/src/test/queries/positive/accumulo_custom_key2.q @@ -0,0 +1,13 @@ +CREATE TABLE accumulo_ck_3(key struct, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom2", + "accumulo.columns.mapping" = ":rowid,cf:string", + "accumulo.composite.rowid"="org.apache.hadoop.hive.accumulo.serde.FirstCharAccumuloCompositeRowId"); + +insert overwrite table accumulo_ck_3 select struct('abcd','mnop','wxyz'),'value' +from src where key = 100; + +select * from accumulo_ck_3; + +DROP TABLE accumulo_ck_3; diff --git a/accumulo-handler/src/test/queries/positive/accumulo_joins.q b/accumulo-handler/src/test/queries/positive/accumulo_joins.q new file mode 100644 index 0000000..b72ec6b --- /dev/null +++ b/accumulo-handler/src/test/queries/positive/accumulo_joins.q @@ -0,0 +1,82 @@ +DROP TABLE users; +DROP TABLE states; +DROP TABLE countries; +DROP TABLE users_level; + +-- From HIVE-1257 + +CREATE TABLE users(key string, state string, country string, country_id int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,info:state,info:country,info:country_id" +); + +CREATE TABLE states(key string, name string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,state:name" +); + +CREATE TABLE countries(key string, name string, country string, country_id int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,info:name,info:country,info:country_id" +); + +INSERT OVERWRITE TABLE users SELECT 'user1', 'IA', 'USA', 0 +FROM src WHERE key=100; + +INSERT OVERWRITE TABLE states SELECT 'IA', 'Iowa' +FROM src WHERE key=100; + +INSERT OVERWRITE TABLE countries SELECT 'USA', 'United States', 'USA', 1 +FROM src WHERE key=100; + +set hive.input.format = org.apache.hadoop.hive.ql.io.HiveInputFormat; + +SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.key); + +SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.country); + +SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country_id = c.country_id); + +SELECT u.key, u.state, s.name FROM users u JOIN states s +ON (u.state = s.key); + +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.key); + +SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.country); + +SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country_id = c.country_id); + +SELECT u.key, u.state, s.name FROM users u JOIN states s +ON (u.state = s.key); + +DROP TABLE users; +DROP TABLE states; +DROP TABLE countries; + +CREATE TABLE users(key int, userid int, username string, created int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,f:userid,f:nickname,f:created"); + +CREATE TABLE users_level(key int, userid int, level int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,f:userid,f:level"); + +-- HIVE-1903: the problem fixed here showed up even without any data, +-- so no need to load any to test it +SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num + FROM users JOIN users_level ON (users.userid = users_level.userid) + GROUP BY year(from_unixtime(users.created)), level; + +DROP TABLE users; +DROP TABLE users_level; diff --git a/accumulo-handler/src/test/queries/positive/accumulo_predicate_pushdown.q b/accumulo-handler/src/test/queries/positive/accumulo_predicate_pushdown.q new file mode 100644 index 0000000..0f064af --- /dev/null +++ b/accumulo-handler/src/test/queries/positive/accumulo_predicate_pushdown.q @@ -0,0 +1,70 @@ +CREATE TABLE accumulo_pushdown(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowid,cf:string"); + +INSERT OVERWRITE TABLE accumulo_pushdown +SELECT cast(key as string), value +FROM src; + +-- with full pushdown +explain select * from accumulo_pushdown where key>'90'; + +select * from accumulo_pushdown where key>'90'; +select * from accumulo_pushdown where key<'1'; +select * from accumulo_pushdown where key<='2'; +select * from accumulo_pushdown where key>='90'; + +-- with constant expression +explain select * from accumulo_pushdown where key>=cast(40 + 50 as string); +select * from accumulo_pushdown where key>=cast(40 + 50 as string); + +-- with partial pushdown + +explain select * from accumulo_pushdown where key>'90' and value like '%9%'; + +select * from accumulo_pushdown where key>'90' and value like '%9%'; + +-- with two residuals + +explain select * from accumulo_pushdown +where key>='90' and value like '%9%' and key=cast(value as int); + +select * from accumulo_pushdown +where key>='90' and value like '%9%' and key=cast(value as int); + + +-- with contradictory pushdowns + +explain select * from accumulo_pushdown +where key<'80' and key>'90' and value like '%90%'; + +select * from accumulo_pushdown +where key<'80' and key>'90' and value like '%90%'; + +-- with nothing to push down + +explain select * from accumulo_pushdown; + +-- with a predicate which is not actually part of the filter, so +-- it should be ignored by pushdown + +explain select * from accumulo_pushdown +where (case when key<'90' then 2 else 4 end) > 3; + +-- with a predicate which is under an OR, so it should +-- be ignored by pushdown + +explain select * from accumulo_pushdown +where key<='80' or value like '%90%'; + +explain select * from accumulo_pushdown where key > '281' +and key < '287'; + +select * from accumulo_pushdown where key > '281' +and key < '287'; + +set hive.optimize.ppd.storage=false; + +-- with pushdown disabled + +explain select * from accumulo_pushdown where key<='90'; diff --git a/accumulo-handler/src/test/queries/positive/accumulo_queries.q b/accumulo-handler/src/test/queries/positive/accumulo_queries.q new file mode 100644 index 0000000..279b661 --- /dev/null +++ b/accumulo-handler/src/test/queries/positive/accumulo_queries.q @@ -0,0 +1,158 @@ +DROP TABLE accumulo_table_1; +CREATE TABLE accumulo_table_1(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_0"); + +DESCRIBE EXTENDED accumulo_table_1; + +select * from accumulo_table_1; + +EXPLAIN FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0; +FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0; + +DROP TABLE accumulo_table_2; +CREATE EXTERNAL TABLE accumulo_table_2(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_0"); + +EXPLAIN +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +ORDER BY key, value LIMIT 20; + +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +ORDER BY key, value LIMIT 20; + +EXPLAIN +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1 WHERE 100 < accumulo_table_1.key) x +JOIN +(SELECT accumulo_table_2.* FROM accumulo_table_2 WHERE accumulo_table_2.key < 120) Y +ON (x.key = Y.key) +ORDER BY key, value; + +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1 WHERE 100 < accumulo_table_1.key) x +JOIN +(SELECT accumulo_table_2.* FROM accumulo_table_2 WHERE accumulo_table_2.key < 120) Y +ON (x.key = Y.key) +ORDER BY key,value; + +DROP TABLE empty_accumulo_table; +CREATE TABLE empty_accumulo_table(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string"); + +DROP TABLE empty_normal_table; +CREATE TABLE empty_normal_table(key int, value string); + +select * from (select count(1) as c from empty_normal_table union all select count(1) as c from empty_accumulo_table) x order by c; +select * from (select count(1) c from empty_normal_table union all select count(1) as c from accumulo_table_1) x order by c; +select * from (select count(1) c from src union all select count(1) as c from empty_accumulo_table) x order by c; +select * from (select count(1) c from src union all select count(1) as c from accumulo_table_1) x order by c; + +CREATE TABLE accumulo_table_3(key int, value string, count int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,cf:val,cf2:count" +); + +EXPLAIN +INSERT OVERWRITE TABLE accumulo_table_3 +SELECT x.key, x.value, Y.count +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y +ON (x.key = Y.key); + +INSERT OVERWRITE TABLE accumulo_table_3 +SELECT x.key, x.value, Y.count +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y +ON (x.key = Y.key); + +select count(1) from accumulo_table_3; +select * from accumulo_table_3 order by key, value limit 5; +select key, count from accumulo_table_3 order by key, count desc limit 5; + +DROP TABLE accumulo_table_4; +CREATE TABLE accumulo_table_4(key int, value1 string, value2 int, value3 int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,a:b,a:c,d:e" +); + +INSERT OVERWRITE TABLE accumulo_table_4 SELECT key, value, key+1, key+2 +FROM src WHERE key=98 OR key=100; + +SELECT * FROM accumulo_table_4 ORDER BY key; + +DROP TABLE accumulo_table_5; +CREATE EXTERNAL TABLE accumulo_table_5(key int, value map) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,a:*") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_4"); + +SELECT * FROM accumulo_table_5 ORDER BY key; + +DROP TABLE accumulo_table_6; +CREATE TABLE accumulo_table_6(key int, value map) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,cf:*" +); +INSERT OVERWRITE TABLE accumulo_table_6 SELECT key, map(value, key) FROM src +WHERE key=98 OR key=100; + +SELECT * FROM accumulo_table_6 ORDER BY key; + +DROP TABLE accumulo_table_7; +CREATE TABLE accumulo_table_7(value map, key int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = "cf:*,:rowID" +); +INSERT OVERWRITE TABLE accumulo_table_7 +SELECT map(value, key, upper(value), key+1), key FROM src +WHERE key=98 OR key=100; + +SELECT * FROM accumulo_table_7 ORDER BY key; + +DROP TABLE accumulo_table_8; +CREATE TABLE accumulo_table_8(key int, value1 string, value2 int, value3 int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,a:b,a:c,d:e" +); + +INSERT OVERWRITE TABLE accumulo_table_8 SELECT key, value, key+1, key+2 +FROM src WHERE key=98 OR key=100; + +SELECT * FROM accumulo_table_8 ORDER BY key; + +DROP TABLE accumulo_table_1; +DROP TABLE accumulo_table_2; +DROP TABLE accumulo_table_3; +DROP TABLE accumulo_table_4; +DROP TABLE accumulo_table_5; +DROP TABLE accumulo_table_6; +DROP TABLE accumulo_table_7; +DROP TABLE accumulo_table_8; +DROP TABLE empty_accumulo_table; +DROP TABLE empty_normal_table; diff --git a/accumulo-handler/src/test/queries/positive/accumulo_single_sourced_multi_insert.q b/accumulo-handler/src/test/queries/positive/accumulo_single_sourced_multi_insert.q new file mode 100644 index 0000000..f904d3f --- /dev/null +++ b/accumulo-handler/src/test/queries/positive/accumulo_single_sourced_multi_insert.q @@ -0,0 +1,24 @@ +-- HIVE-4375 Single sourced multi insert consists of native and non-native table mixed throws NPE +CREATE TABLE src_x1(key string, value string); +CREATE TABLE src_x2(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowid, cf:value"); + +explain +from src a +insert overwrite table src_x1 +select key,"" where a.key > 0 AND a.key < 50 +insert overwrite table src_x2 +select value,"" where a.key > 50 AND a.key < 100; + +from src a +insert overwrite table src_x1 +select key,"" where a.key > 0 AND a.key < 50 +insert overwrite table src_x2 +select value,"" where a.key > 50 AND a.key < 100; + +select * from src_x1 order by key; +select * from src_x2 order by key; + +DROP TABLE src_x1; +DROP TABLE src_x2; diff --git a/accumulo-handler/src/test/results/positive/accumulo_custom_key.q.out b/accumulo-handler/src/test/results/positive/accumulo_custom_key.q.out new file mode 100644 index 0000000..8c85fb6 --- /dev/null +++ b/accumulo-handler/src/test/results/positive/accumulo_custom_key.q.out @@ -0,0 +1,78 @@ +PREHOOK: query: CREATE TABLE accumulo_ck_1(key struct, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom", + "accumulo.columns.mapping" = ":rowid,cf:string", + "accumulo.composite.rowid.factory"="org.apache.hadoop.hive.accumulo.serde.DelimitedAccumuloRowIdFactory", + "accumulo.composite.delimiter" = "$") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_ck_1(key struct, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom", + "accumulo.columns.mapping" = ":rowid,cf:string", + "accumulo.composite.rowid.factory"="org.apache.hadoop.hive.accumulo.serde.DelimitedAccumuloRowIdFactory", + "accumulo.composite.delimiter" = "$") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_ck_1 +PREHOOK: query: CREATE EXTERNAL TABLE accumulo_ck_2(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom", + "accumulo.columns.mapping" = ":rowid,cf:string") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE EXTERNAL TABLE accumulo_ck_2(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom", + "accumulo.columns.mapping" = ":rowid,cf:string") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_ck_2 +PREHOOK: query: insert overwrite table accumulo_ck_1 select struct('1000','2000','3000'),'value' +from src where key = 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_ck_1 +POSTHOOK: query: insert overwrite table accumulo_ck_1 select struct('1000','2000','3000'),'value' +from src where key = 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_ck_1 +PREHOOK: query: select * from accumulo_ck_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_ck_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_ck_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_ck_1 +#### A masked pattern was here #### +{"col1":"1000","col2":"2000","col3":"3000"} value +PREHOOK: query: select * from accumulo_ck_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_ck_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_ck_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_ck_2 +#### A masked pattern was here #### +1000$2000$3000 value +PREHOOK: query: DROP TABLE accumulo_ck_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_ck_1 +PREHOOK: Output: default@accumulo_ck_1 +POSTHOOK: query: DROP TABLE accumulo_ck_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_ck_1 +POSTHOOK: Output: default@accumulo_ck_1 +PREHOOK: query: DROP TABLE accumulo_ck_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_ck_2 +PREHOOK: Output: default@accumulo_ck_2 +POSTHOOK: query: DROP TABLE accumulo_ck_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_ck_2 +POSTHOOK: Output: default@accumulo_ck_2 diff --git a/accumulo-handler/src/test/results/positive/accumulo_custom_key2.q.out b/accumulo-handler/src/test/results/positive/accumulo_custom_key2.q.out new file mode 100644 index 0000000..48d1f84 --- /dev/null +++ b/accumulo-handler/src/test/results/positive/accumulo_custom_key2.q.out @@ -0,0 +1,44 @@ +PREHOOK: query: CREATE TABLE accumulo_ck_3(key struct, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom2", + "accumulo.columns.mapping" = ":rowid,cf:string", + "accumulo.composite.rowid"="org.apache.hadoop.hive.accumulo.serde.FirstCharAccumuloCompositeRowId") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_ck_3(key struct, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( + "accumulo.table.name" = "accumulo_custom2", + "accumulo.columns.mapping" = ":rowid,cf:string", + "accumulo.composite.rowid"="org.apache.hadoop.hive.accumulo.serde.FirstCharAccumuloCompositeRowId") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_ck_3 +PREHOOK: query: insert overwrite table accumulo_ck_3 select struct('abcd','mnop','wxyz'),'value' +from src where key = 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_ck_3 +POSTHOOK: query: insert overwrite table accumulo_ck_3 select struct('abcd','mnop','wxyz'),'value' +from src where key = 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_ck_3 +PREHOOK: query: select * from accumulo_ck_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_ck_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_ck_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_ck_3 +#### A masked pattern was here #### +{"col1":"a","col2":"m","col3":"w"} value +PREHOOK: query: DROP TABLE accumulo_ck_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_ck_3 +PREHOOK: Output: default@accumulo_ck_3 +POSTHOOK: query: DROP TABLE accumulo_ck_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_ck_3 +POSTHOOK: Output: default@accumulo_ck_3 diff --git a/accumulo-handler/src/test/results/positive/accumulo_joins.q.out b/accumulo-handler/src/test/results/positive/accumulo_joins.q.out new file mode 100644 index 0000000..32019dc --- /dev/null +++ b/accumulo-handler/src/test/results/positive/accumulo_joins.q.out @@ -0,0 +1,277 @@ +PREHOOK: query: DROP TABLE users +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE users +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE states +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE states +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE countries +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE countries +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE users_level +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE users_level +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- From HIVE-1257 + +CREATE TABLE users(key string, state string, country string, country_id int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,info:state,info:country,info:country_id" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- From HIVE-1257 + +CREATE TABLE users(key string, state string, country string, country_id int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,info:state,info:country,info:country_id" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@users +PREHOOK: query: CREATE TABLE states(key string, name string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,state:name" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE states(key string, name string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,state:name" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@states +PREHOOK: query: CREATE TABLE countries(key string, name string, country string, country_id int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,info:name,info:country,info:country_id" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE countries(key string, name string, country string, country_id int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,info:name,info:country,info:country_id" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@countries +PREHOOK: query: INSERT OVERWRITE TABLE users SELECT 'user1', 'IA', 'USA', 0 +FROM src WHERE key=100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@users +POSTHOOK: query: INSERT OVERWRITE TABLE users SELECT 'user1', 'IA', 'USA', 0 +FROM src WHERE key=100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@users +PREHOOK: query: INSERT OVERWRITE TABLE states SELECT 'IA', 'Iowa' +FROM src WHERE key=100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@states +POSTHOOK: query: INSERT OVERWRITE TABLE states SELECT 'IA', 'Iowa' +FROM src WHERE key=100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@states +PREHOOK: query: INSERT OVERWRITE TABLE countries SELECT 'USA', 'United States', 'USA', 1 +FROM src WHERE key=100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@countries +POSTHOOK: query: INSERT OVERWRITE TABLE countries SELECT 'USA', 'United States', 'USA', 1 +FROM src WHERE key=100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@countries +PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@countries +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@countries +POSTHOOK: Input: default@users +#### A masked pattern was here #### +user1 USA United States USA +PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.country) +PREHOOK: type: QUERY +PREHOOK: Input: default@countries +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.country) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@countries +POSTHOOK: Input: default@users +#### A masked pattern was here #### +user1 USA United States USA +PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country_id = c.country_id) +PREHOOK: type: QUERY +PREHOOK: Input: default@countries +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country_id = c.country_id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@countries +POSTHOOK: Input: default@users +#### A masked pattern was here #### +PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s +ON (u.state = s.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@states +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s +ON (u.state = s.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@states +POSTHOOK: Input: default@users +#### A masked pattern was here #### +user1 IA Iowa +PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@countries +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@countries +POSTHOOK: Input: default@users +#### A masked pattern was here #### +user1 USA United States USA +PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.country) +PREHOOK: type: QUERY +PREHOOK: Input: default@countries +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country = c.country) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@countries +POSTHOOK: Input: default@users +#### A masked pattern was here #### +user1 USA United States USA +PREHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country_id = c.country_id) +PREHOOK: type: QUERY +PREHOOK: Input: default@countries +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.country, c.name, c.key FROM users u JOIN countries c +ON (u.country_id = c.country_id) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@countries +POSTHOOK: Input: default@users +#### A masked pattern was here #### +PREHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s +ON (u.state = s.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@states +PREHOOK: Input: default@users +#### A masked pattern was here #### +POSTHOOK: query: SELECT u.key, u.state, s.name FROM users u JOIN states s +ON (u.state = s.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@states +POSTHOOK: Input: default@users +#### A masked pattern was here #### +user1 IA Iowa +PREHOOK: query: DROP TABLE users +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@users +PREHOOK: Output: default@users +POSTHOOK: query: DROP TABLE users +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@users +POSTHOOK: Output: default@users +PREHOOK: query: DROP TABLE states +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@states +PREHOOK: Output: default@states +POSTHOOK: query: DROP TABLE states +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@states +POSTHOOK: Output: default@states +PREHOOK: query: DROP TABLE countries +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@countries +PREHOOK: Output: default@countries +POSTHOOK: query: DROP TABLE countries +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@countries +POSTHOOK: Output: default@countries +PREHOOK: query: CREATE TABLE users(key int, userid int, username string, created int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,f:userid,f:nickname,f:created") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE users(key int, userid int, username string, created int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,f:userid,f:nickname,f:created") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@users +PREHOOK: query: CREATE TABLE users_level(key int, userid int, level int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,f:userid,f:level") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE users_level(key int, userid int, level int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,f:userid,f:level") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@users_level +PREHOOK: query: -- HIVE-1903: the problem fixed here showed up even without any data, +-- so no need to load any to test it +SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num + FROM users JOIN users_level ON (users.userid = users_level.userid) + GROUP BY year(from_unixtime(users.created)), level +PREHOOK: type: QUERY +PREHOOK: Input: default@users +PREHOOK: Input: default@users_level +#### A masked pattern was here #### +POSTHOOK: query: -- HIVE-1903: the problem fixed here showed up even without any data, +-- so no need to load any to test it +SELECT year(from_unixtime(users.created)) AS year, level, count(users.userid) AS num + FROM users JOIN users_level ON (users.userid = users_level.userid) + GROUP BY year(from_unixtime(users.created)), level +POSTHOOK: type: QUERY +POSTHOOK: Input: default@users +POSTHOOK: Input: default@users_level +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE users +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@users +PREHOOK: Output: default@users +POSTHOOK: query: DROP TABLE users +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@users +POSTHOOK: Output: default@users +PREHOOK: query: DROP TABLE users_level +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@users_level +PREHOOK: Output: default@users_level +POSTHOOK: query: DROP TABLE users_level +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@users_level +POSTHOOK: Output: default@users_level diff --git a/accumulo-handler/src/test/results/positive/accumulo_predicate_pushdown.q.out b/accumulo-handler/src/test/results/positive/accumulo_predicate_pushdown.q.out new file mode 100644 index 0000000..49190e0 --- /dev/null +++ b/accumulo-handler/src/test/results/positive/accumulo_predicate_pushdown.q.out @@ -0,0 +1,599 @@ +PREHOOK: query: CREATE TABLE accumulo_pushdown(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowid,cf:string") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_pushdown(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowid,cf:string") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_pushdown +PREHOOK: query: INSERT OVERWRITE TABLE accumulo_pushdown +SELECT cast(key as string), value +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_pushdown +POSTHOOK: query: INSERT OVERWRITE TABLE accumulo_pushdown +SELECT cast(key as string), value +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_pushdown +PREHOOK: query: -- with full pushdown +explain select * from accumulo_pushdown where key>'90' +PREHOOK: type: QUERY +POSTHOOK: query: -- with full pushdown +explain select * from accumulo_pushdown where key>'90' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + filterExpr: (key > '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key > '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from accumulo_pushdown where key>'90' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown where key>'90' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 +PREHOOK: query: select * from accumulo_pushdown where key<'1' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown where key<'1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: select * from accumulo_pushdown where key<='2' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown where key<='2' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +119 val_119 +12 val_12 +120 val_120 +125 val_125 +126 val_126 +128 val_128 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +136 val_136 +137 val_137 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +149 val_149 +15 val_15 +150 val_150 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +165 val_165 +166 val_166 +167 val_167 +168 val_168 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +174 val_174 +175 val_175 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +192 val_192 +193 val_193 +194 val_194 +195 val_195 +196 val_196 +197 val_197 +199 val_199 +2 val_2 +PREHOOK: query: select * from accumulo_pushdown where key>='90' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown where key>='90' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 +PREHOOK: query: -- with constant expression +explain select * from accumulo_pushdown where key>=cast(40 + 50 as string) +PREHOOK: type: QUERY +POSTHOOK: query: -- with constant expression +explain select * from accumulo_pushdown where key>=cast(40 + 50 as string) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + filterExpr: (key >= '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key >= '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from accumulo_pushdown where key>=cast(40 + 50 as string) +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown where key>=cast(40 + 50 as string) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 +PREHOOK: query: -- with partial pushdown + +explain select * from accumulo_pushdown where key>'90' and value like '%9%' +PREHOOK: type: QUERY +POSTHOOK: query: -- with partial pushdown + +explain select * from accumulo_pushdown where key>'90' and value like '%9%' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + filterExpr: (key > '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value like '%9%') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from accumulo_pushdown where key>'90' and value like '%9%' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown where key>'90' and value like '%9%' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 +PREHOOK: query: -- with two residuals + +explain select * from accumulo_pushdown +where key>='90' and value like '%9%' and key=cast(value as int) +PREHOOK: type: QUERY +POSTHOOK: query: -- with two residuals + +explain select * from accumulo_pushdown +where key>='90' and value like '%9%' and key=cast(value as int) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + filterExpr: (key >= '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((value like '%9%') and (key = UDFToInteger(value))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from accumulo_pushdown +where key>='90' and value like '%9%' and key=cast(value as int) +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown +where key>='90' and value like '%9%' and key=cast(value as int) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +PREHOOK: query: -- with contradictory pushdowns + +explain select * from accumulo_pushdown +where key<'80' and key>'90' and value like '%90%' +PREHOOK: type: QUERY +POSTHOOK: query: -- with contradictory pushdowns + +explain select * from accumulo_pushdown +where key<'80' and key>'90' and value like '%90%' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + filterExpr: ((key < '80') and (key > '90')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (value like '%90%') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from accumulo_pushdown +where key<'80' and key>'90' and value like '%90%' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown +where key<'80' and key>'90' and value like '%90%' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +PREHOOK: query: -- with nothing to push down + +explain select * from accumulo_pushdown +PREHOOK: type: QUERY +POSTHOOK: query: -- with nothing to push down + +explain select * from accumulo_pushdown +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: accumulo_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + ListSink + +PREHOOK: query: -- with a predicate which is not actually part of the filter, so +-- it should be ignored by pushdown + +explain select * from accumulo_pushdown +where (case when key<'90' then 2 else 4 end) > 3 +PREHOOK: type: QUERY +POSTHOOK: query: -- with a predicate which is not actually part of the filter, so +-- it should be ignored by pushdown + +explain select * from accumulo_pushdown +where (case when key<'90' then 2 else 4 end) > 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (CASE WHEN ((key < '90')) THEN (2) ELSE (4) END > 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- with a predicate which is under an OR, so it should +-- be ignored by pushdown + +explain select * from accumulo_pushdown +where key<='80' or value like '%90%' +PREHOOK: type: QUERY +POSTHOOK: query: -- with a predicate which is under an OR, so it should +-- be ignored by pushdown + +explain select * from accumulo_pushdown +where key<='80' or value like '%90%' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((key <= '80') or (value like '%90%')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from accumulo_pushdown where key > '281' +and key < '287' +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from accumulo_pushdown where key > '281' +and key < '287' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + filterExpr: ((key > '281') and (key < '287')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((key > '281') and (key < '287')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from accumulo_pushdown where key > '281' +and key < '287' +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_pushdown where key > '281' +and key < '287' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_pushdown +#### A masked pattern was here #### +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +PREHOOK: query: -- with pushdown disabled + +explain select * from accumulo_pushdown where key<='90' +PREHOOK: type: QUERY +POSTHOOK: query: -- with pushdown disabled + +explain select * from accumulo_pushdown where key<='90' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_pushdown + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (key <= '90') (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out new file mode 100644 index 0000000..a9e0a8b --- /dev/null +++ b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out @@ -0,0 +1,899 @@ +PREHOOK: query: DROP TABLE accumulo_table_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_table_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE accumulo_table_1(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_0") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_table_1(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_0") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_1 +PREHOOK: query: DESCRIBE EXTENDED accumulo_table_1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@accumulo_table_1 +POSTHOOK: query: DESCRIBE EXTENDED accumulo_table_1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@accumulo_table_1 +key int from deserializer +value string from deserializer + +#### A masked pattern was here #### +PREHOOK: query: select * from accumulo_table_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_table_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_1 +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key % 2) = 0) (type: boolean) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableInputFormat + output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat + serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe + name: default.accumulo_table_1 + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_table_1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_table_1 +PREHOOK: query: DROP TABLE accumulo_table_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_table_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE accumulo_table_2(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_0") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE EXTERNAL TABLE accumulo_table_2(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_0") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_2 +PREHOOK: query: EXPLAIN +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +ORDER BY key, value LIMIT 20 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +ORDER BY key, value LIMIT 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_table_1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 15 Data size: 3006 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 3006 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 15 Data size: 3006 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +ORDER BY key, value LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_1 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +ORDER BY key, value LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_1 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +104 val_104 +104 val_104 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +PREHOOK: query: EXPLAIN +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1 WHERE 100 < accumulo_table_1.key) x +JOIN +(SELECT accumulo_table_2.* FROM accumulo_table_2 WHERE accumulo_table_2.key < 120) Y +ON (x.key = Y.key) +ORDER BY key, value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1 WHERE 100 < accumulo_table_1.key) x +JOIN +(SELECT accumulo_table_2.* FROM accumulo_table_2 WHERE accumulo_table_2.key < 120) Y +ON (x.key = Y.key) +ORDER BY key, value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_table_1 + filterExpr: (100 < key) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan + alias: accumulo_table_2 + filterExpr: (key < 120) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1 WHERE 100 < accumulo_table_1.key) x +JOIN +(SELECT accumulo_table_2.* FROM accumulo_table_2 WHERE accumulo_table_2.key < 120) Y +ON (x.key = Y.key) +ORDER BY key,value +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_1 +PREHOOK: Input: default@accumulo_table_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT Y.* +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1 WHERE 100 < accumulo_table_1.key) x +JOIN +(SELECT accumulo_table_2.* FROM accumulo_table_2 WHERE accumulo_table_2.key < 120) Y +ON (x.key = Y.key) +ORDER BY key,value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_1 +POSTHOOK: Input: default@accumulo_table_2 +#### A masked pattern was here #### +12 val_12 +104 val_104 +114 val_114 +116 val_116 +118 val_118 +PREHOOK: query: DROP TABLE empty_accumulo_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE empty_accumulo_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE empty_accumulo_table(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE empty_accumulo_table(key int, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,cf:string") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@empty_accumulo_table +PREHOOK: query: DROP TABLE empty_normal_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE empty_normal_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE empty_normal_table(key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE empty_normal_table(key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@empty_normal_table +PREHOOK: query: select * from (select count(1) as c from empty_normal_table union all select count(1) as c from empty_accumulo_table) x order by c +PREHOOK: type: QUERY +PREHOOK: Input: default@empty_accumulo_table +PREHOOK: Input: default@empty_normal_table +#### A masked pattern was here #### +POSTHOOK: query: select * from (select count(1) as c from empty_normal_table union all select count(1) as c from empty_accumulo_table) x order by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@empty_accumulo_table +POSTHOOK: Input: default@empty_normal_table +#### A masked pattern was here #### +0 +0 +PREHOOK: query: select * from (select count(1) c from empty_normal_table union all select count(1) as c from accumulo_table_1) x order by c +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_1 +PREHOOK: Input: default@empty_normal_table +#### A masked pattern was here #### +POSTHOOK: query: select * from (select count(1) c from empty_normal_table union all select count(1) as c from accumulo_table_1) x order by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_1 +POSTHOOK: Input: default@empty_normal_table +#### A masked pattern was here #### +0 +155 +PREHOOK: query: select * from (select count(1) c from src union all select count(1) as c from empty_accumulo_table) x order by c +PREHOOK: type: QUERY +PREHOOK: Input: default@empty_accumulo_table +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from (select count(1) c from src union all select count(1) as c from empty_accumulo_table) x order by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@empty_accumulo_table +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +500 +PREHOOK: query: select * from (select count(1) c from src union all select count(1) as c from accumulo_table_1) x order by c +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_1 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from (select count(1) c from src union all select count(1) as c from accumulo_table_1) x order by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_1 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +155 +500 +PREHOOK: query: CREATE TABLE accumulo_table_3(key int, value string, count int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,cf:val,cf2:count" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_table_3(key int, value string, count int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,cf:val,cf2:count" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_3 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE accumulo_table_3 +SELECT x.key, x.value, Y.count +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y +ON (x.key = Y.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE accumulo_table_3 +SELECT x.key, x.value, Y.count +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y +ON (x.key = Y.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(key) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1402 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1402 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: accumulo_table_1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 14 Data size: 1402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 15 Data size: 1542 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 1542 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1542 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableInputFormat + output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat + serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe + name: default.accumulo_table_3 + +PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_3 +SELECT x.key, x.value, Y.count +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y +ON (x.key = Y.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_1 +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_table_3 +POSTHOOK: query: INSERT OVERWRITE TABLE accumulo_table_3 +SELECT x.key, x.value, Y.count +FROM +(SELECT accumulo_table_1.* FROM accumulo_table_1) x +JOIN +(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y +ON (x.key = Y.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_1 +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_table_3 +PREHOOK: query: select count(1) from accumulo_table_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_3 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from accumulo_table_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_3 +#### A masked pattern was here #### +155 +PREHOOK: query: select * from accumulo_table_3 order by key, value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from accumulo_table_3 order by key, value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_3 +#### A masked pattern was here #### +0 val_0 3 +2 val_2 1 +4 val_4 1 +8 val_8 1 +10 val_10 1 +PREHOOK: query: select key, count from accumulo_table_3 order by key, count desc limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_3 +#### A masked pattern was here #### +POSTHOOK: query: select key, count from accumulo_table_3 order by key, count desc limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_3 +#### A masked pattern was here #### +0 3 +2 1 +4 1 +8 1 +10 1 +PREHOOK: query: DROP TABLE accumulo_table_4 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_table_4 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE accumulo_table_4(key int, value1 string, value2 int, value3 int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,a:b,a:c,d:e" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_table_4(key int, value1 string, value2 int, value3 int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,a:b,a:c,d:e" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_4 +PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_4 SELECT key, value, key+1, key+2 +FROM src WHERE key=98 OR key=100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_table_4 +POSTHOOK: query: INSERT OVERWRITE TABLE accumulo_table_4 SELECT key, value, key+1, key+2 +FROM src WHERE key=98 OR key=100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_table_4 +PREHOOK: query: SELECT * FROM accumulo_table_4 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM accumulo_table_4 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_4 +#### A masked pattern was here #### +98 val_98 99 100 +100 val_100 101 102 +PREHOOK: query: DROP TABLE accumulo_table_5 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_table_5 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE accumulo_table_5(key int, value map) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,a:*") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_4") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE EXTERNAL TABLE accumulo_table_5(key int, value map) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowID,a:*") +TBLPROPERTIES ("accumulo.table.name" = "accumulo_table_4") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_5 +PREHOOK: query: SELECT * FROM accumulo_table_5 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM accumulo_table_5 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_5 +#### A masked pattern was here #### +98 {"b":"val_98","c":"99"} +100 {"b":"val_100","c":"101"} +PREHOOK: query: DROP TABLE accumulo_table_6 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_table_6 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE accumulo_table_6(key int, value map) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,cf:*" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_table_6(key int, value map) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,cf:*" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_6 +PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_6 SELECT key, map(value, key) FROM src +WHERE key=98 OR key=100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_table_6 +POSTHOOK: query: INSERT OVERWRITE TABLE accumulo_table_6 SELECT key, map(value, key) FROM src +WHERE key=98 OR key=100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_table_6 +PREHOOK: query: SELECT * FROM accumulo_table_6 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_6 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM accumulo_table_6 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_6 +#### A masked pattern was here #### +98 {"val_98":"98"} +100 {"val_100":"100"} +PREHOOK: query: DROP TABLE accumulo_table_7 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_table_7 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE accumulo_table_7(value map, key int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = "cf:*,:rowID" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_table_7(value map, key int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = "cf:*,:rowID" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_7 +PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_7 +SELECT map(value, key, upper(value), key+1), key FROM src +WHERE key=98 OR key=100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_table_7 +POSTHOOK: query: INSERT OVERWRITE TABLE accumulo_table_7 +SELECT map(value, key, upper(value), key+1), key FROM src +WHERE key=98 OR key=100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_table_7 +PREHOOK: query: SELECT * FROM accumulo_table_7 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_7 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM accumulo_table_7 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_7 +#### A masked pattern was here #### +{"VAL_98":"99.0","val_98":"98"} 98 +{"VAL_100":"101.0","val_100":"100"} 100 +PREHOOK: query: DROP TABLE accumulo_table_8 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE accumulo_table_8 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE accumulo_table_8(key int, value1 string, value2 int, value3 int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,a:b,a:c,d:e" +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE accumulo_table_8(key int, value1 string, value2 int, value3 int) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ( +"accumulo.columns.mapping" = ":rowID,a:b,a:c,d:e" +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@accumulo_table_8 +PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_8 SELECT key, value, key+1, key+2 +FROM src WHERE key=98 OR key=100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@accumulo_table_8 +POSTHOOK: query: INSERT OVERWRITE TABLE accumulo_table_8 SELECT key, value, key+1, key+2 +FROM src WHERE key=98 OR key=100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@accumulo_table_8 +PREHOOK: query: SELECT * FROM accumulo_table_8 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@accumulo_table_8 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM accumulo_table_8 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@accumulo_table_8 +#### A masked pattern was here #### +98 val_98 99 100 +100 val_100 101 102 +PREHOOK: query: DROP TABLE accumulo_table_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_1 +PREHOOK: Output: default@accumulo_table_1 +POSTHOOK: query: DROP TABLE accumulo_table_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_1 +POSTHOOK: Output: default@accumulo_table_1 +PREHOOK: query: DROP TABLE accumulo_table_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_2 +PREHOOK: Output: default@accumulo_table_2 +POSTHOOK: query: DROP TABLE accumulo_table_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_2 +POSTHOOK: Output: default@accumulo_table_2 +PREHOOK: query: DROP TABLE accumulo_table_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_3 +PREHOOK: Output: default@accumulo_table_3 +POSTHOOK: query: DROP TABLE accumulo_table_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_3 +POSTHOOK: Output: default@accumulo_table_3 +PREHOOK: query: DROP TABLE accumulo_table_4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_4 +PREHOOK: Output: default@accumulo_table_4 +POSTHOOK: query: DROP TABLE accumulo_table_4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_4 +POSTHOOK: Output: default@accumulo_table_4 +PREHOOK: query: DROP TABLE accumulo_table_5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_5 +PREHOOK: Output: default@accumulo_table_5 +POSTHOOK: query: DROP TABLE accumulo_table_5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_5 +POSTHOOK: Output: default@accumulo_table_5 +PREHOOK: query: DROP TABLE accumulo_table_6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_6 +PREHOOK: Output: default@accumulo_table_6 +POSTHOOK: query: DROP TABLE accumulo_table_6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_6 +POSTHOOK: Output: default@accumulo_table_6 +PREHOOK: query: DROP TABLE accumulo_table_7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_7 +PREHOOK: Output: default@accumulo_table_7 +POSTHOOK: query: DROP TABLE accumulo_table_7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_7 +POSTHOOK: Output: default@accumulo_table_7 +PREHOOK: query: DROP TABLE accumulo_table_8 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@accumulo_table_8 +PREHOOK: Output: default@accumulo_table_8 +POSTHOOK: query: DROP TABLE accumulo_table_8 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@accumulo_table_8 +POSTHOOK: Output: default@accumulo_table_8 +PREHOOK: query: DROP TABLE empty_accumulo_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@empty_accumulo_table +PREHOOK: Output: default@empty_accumulo_table +POSTHOOK: query: DROP TABLE empty_accumulo_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@empty_accumulo_table +POSTHOOK: Output: default@empty_accumulo_table +PREHOOK: query: DROP TABLE empty_normal_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@empty_normal_table +PREHOOK: Output: default@empty_normal_table +POSTHOOK: query: DROP TABLE empty_normal_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@empty_normal_table +POSTHOOK: Output: default@empty_normal_table diff --git a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out new file mode 100644 index 0000000..6f22539 --- /dev/null +++ b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -0,0 +1,253 @@ +PREHOOK: query: -- HIVE-4375 Single sourced multi insert consists of native and non-native table mixed throws NPE +CREATE TABLE src_x1(key string, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- HIVE-4375 Single sourced multi insert consists of native and non-native table mixed throws NPE +CREATE TABLE src_x1(key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_x1 +PREHOOK: query: CREATE TABLE src_x2(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowid, cf:value") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE src_x2(key string, value string) +STORED BY 'org.apache.hadoop.hive.accumulo.AccumuloStorageHandler' +WITH SERDEPROPERTIES ("accumulo.columns.mapping" = ":rowid, cf:value") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_x2 +PREHOOK: query: explain +from src a +insert overwrite table src_x1 +select key,"" where a.key > 0 AND a.key < 50 +insert overwrite table src_x2 +select value,"" where a.key > 50 AND a.key < 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src a +insert overwrite table src_x1 +select key,"" where a.key > 0 AND a.key < 50 +insert overwrite table src_x2 +select value,"" where a.key > 50 AND a.key < 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 0) and (key < 50)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), '' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_x1 + Filter Operator + predicate: ((key > 50) and (key < 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), '' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableInputFormat + output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat + serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe + name: default.src_x2 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_x1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_x1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_x1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: from src a +insert overwrite table src_x1 +select key,"" where a.key > 0 AND a.key < 50 +insert overwrite table src_x2 +select value,"" where a.key > 50 AND a.key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_x1 +PREHOOK: Output: default@src_x2 +POSTHOOK: query: from src a +insert overwrite table src_x1 +select key,"" where a.key > 0 AND a.key < 50 +insert overwrite table src_x2 +select value,"" where a.key > 50 AND a.key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_x1 +POSTHOOK: Output: default@src_x2 +POSTHOOK: Lineage: src_x1.key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_x1.value SIMPLE [] +PREHOOK: query: select * from src_x1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_x1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_x1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_x1 +#### A masked pattern was here #### +10 +11 +12 +12 +15 +15 +17 +18 +18 +19 +2 +20 +24 +24 +26 +26 +27 +28 +30 +33 +34 +35 +35 +35 +37 +37 +4 +41 +42 +42 +43 +44 +47 +5 +5 +5 +8 +9 +PREHOOK: query: select * from src_x2 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src_x2 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_x2 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_x2 +#### A masked pattern was here #### +val_51 +val_53 +val_54 +val_57 +val_58 +val_64 +val_65 +val_66 +val_67 +val_69 +val_70 +val_72 +val_74 +val_76 +val_77 +val_78 +val_80 +val_82 +val_83 +val_84 +val_85 +val_86 +val_87 +val_90 +val_92 +val_95 +val_96 +val_97 +val_98 +PREHOOK: query: DROP TABLE src_x1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_x1 +PREHOOK: Output: default@src_x1 +POSTHOOK: query: DROP TABLE src_x1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_x1 +POSTHOOK: Output: default@src_x1 +PREHOOK: query: DROP TABLE src_x2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_x2 +PREHOOK: Output: default@src_x2 +POSTHOOK: query: DROP TABLE src_x2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_x2 +POSTHOOK: Output: default@src_x2 diff --git a/accumulo-handler/src/test/templates/TestAccumuloCliDriver.vm b/accumulo-handler/src/test/templates/TestAccumuloCliDriver.vm new file mode 100644 index 0000000..9aea997 --- /dev/null +++ b/accumulo-handler/src/test/templates/TestAccumuloCliDriver.vm @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.cli; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import java.io.*; +import java.util.*; + +import org.apache.hadoop.hive.accumulo.AccumuloQTestUtil; +import org.apache.hadoop.hive.accumulo.AccumuloTestSetup; +import org.apache.hadoop.hive.ql.QTestUtil.MiniClusterType; +import org.apache.hadoop.hive.ql.session.SessionState; + +public class $className extends TestCase { + + private static final String HIVE_ROOT = AccumuloQTestUtil.ensurePathEndsInSlash(System.getProperty("hive.root")); + private AccumuloQTestUtil qt; + private AccumuloTestSetup setup; + + public $className(String name, AccumuloTestSetup setup) { + super(name); + qt = null; + this.setup = setup; + } + + @Override + protected void setUp() { + + MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode"); + + try { + qt = new AccumuloQTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR, setup); + } catch (Exception e) { + System.err.println("Exception: " + e.getMessage()); + e.printStackTrace(); + System.err.flush(); + fail("Unexpected exception in setup: " + e); + } + } + + @Override + protected void tearDown() { + try { + qt.shutdown(); + } + catch (Exception e) { + System.err.println("Exception: " + e.getMessage()); + e.printStackTrace(); + System.err.flush(); + fail("Unexpected exception in tearDown"); + } + } + + public static Test suite() { + Set qFilesToExecute = new HashSet(); + String qFiles = System.getProperty("qfile", "").trim(); + if(!qFiles.isEmpty()) { + for(String qFile : qFiles.split(",")) { + qFile = qFile.trim(); + if(!qFile.isEmpty()) { + qFilesToExecute.add(qFile); + } + } + } + TestSuite suite = new TestSuite(); + AccumuloTestSetup setup = new AccumuloTestSetup(suite); +#foreach ($qf in $qfiles) + #set ($fname = $qf.getName()) + #set ($eidx = $fname.indexOf('.')) + #set ($tname = $fname.substring(0, $eidx)) + if(qFilesToExecute.isEmpty() || qFilesToExecute.contains("$fname")) { + suite.addTest(new $className("testCliDriver_$tname", setup)); + } +#end + return setup; + } + +#foreach ($qf in $qfiles) + #set ($fname = $qf.getName()) + #set ($eidx = $fname.indexOf('.')) + #set ($tname = $fname.substring(0, $eidx)) + #set ($fpath = $qfilesMap.get($fname)) + public void testCliDriver_$tname() throws Exception { + runTest("$tname", "$fname", (HIVE_ROOT + "$fpath")); + } + +#end + + private void runTest(String tname, String fname, String fpath) throws Exception { + long startTime = System.currentTimeMillis(); + try { + System.err.println("Begin query: " + fname); + + qt.addFile(fpath); + + if (qt.shouldBeSkipped(fname)) { + System.err.println("Test " + fname + " skipped"); + return; + } + + qt.cliInit(fname); + qt.clearTestSideEffects(); + int ecode = qt.executeClient(fname); + if (ecode != 0) { + qt.failed(ecode, fname, null); + } + + ecode = qt.checkCliDriverResults(fname); + if (ecode != 0) { + qt.failedDiff(ecode, fname, null); + } + qt.clearPostTestEffects(); + + } catch (Throwable e) { + qt.failed(e, fname, null); + } + + long elapsedTime = System.currentTimeMillis() - startTime; + System.err.println("Done query: " + fname + " elapsedTime=" + elapsedTime/1000 + "s"); + assertTrue("Test passed", true); + } +} + diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml index 249956f..3b735ad 100644 --- a/itests/qtest/pom.xml +++ b/itests/qtest/pom.xml @@ -42,6 +42,11 @@ + org.apache.accumulo + accumulo-minicluster + test + + org.apache.hive hive-ant ${project.version} @@ -402,6 +407,7 @@ + @@ -571,6 +577,18 @@ logFile="${project.build.directory}/testhbasenegativeclidrivergen.log" logDirectory="${project.build.directory}/qfile-results/hbase-handler/negative"/> + + + diff --git a/itests/util/pom.xml b/itests/util/pom.xml index aca01cb..8a7f3a7 100644 --- a/itests/util/pom.xml +++ b/itests/util/pom.xml @@ -35,6 +35,21 @@ + org.apache.accumulo + accumulo-minicluster + + + org.apache.hive + hive-accumulo-handler + ${project.version} + + + org.apache.hive + hive-accumulo-handler + ${project.version} + tests + + org.apache.hive hive-common ${project.version} diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/accumulo/AccumuloQTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/accumulo/AccumuloQTestUtil.java new file mode 100644 index 0000000..2ac679e --- /dev/null +++ b/itests/util/src/main/java/org/apache/hadoop/hive/accumulo/AccumuloQTestUtil.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import org.apache.hadoop.hive.ql.QTestUtil; + +/** + * AccumuloQTestUtil initializes Accumulo-specific test fixtures. + */ +public class AccumuloQTestUtil extends QTestUtil { + public AccumuloQTestUtil(String outDir, String logDir, MiniClusterType miniMr, + AccumuloTestSetup setup) throws Exception { + + super(outDir, logDir, miniMr, null); + setup.setupWithHiveConf(conf); + super.init(); + } + + @Override + public void init() throws Exception { + // defer + } +} diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/accumulo/AccumuloTestSetup.java b/itests/util/src/main/java/org/apache/hadoop/hive/accumulo/AccumuloTestSetup.java new file mode 100644 index 0000000..132e8c8 --- /dev/null +++ b/itests/util/src/main/java/org/apache/hadoop/hive/accumulo/AccumuloTestSetup.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.accumulo; + +import java.io.File; +import java.sql.Date; +import java.sql.Timestamp; + +import junit.extensions.TestSetup; +import junit.framework.Test; + +import org.apache.accumulo.core.client.AccumuloException; +import org.apache.accumulo.core.client.AccumuloSecurityException; +import org.apache.accumulo.core.client.BatchWriter; +import org.apache.accumulo.core.client.BatchWriterConfig; +import org.apache.accumulo.core.client.Connector; +import org.apache.accumulo.core.client.TableExistsException; +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.accumulo.core.client.admin.TableOperations; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.minicluster.MiniAccumuloCluster; +import org.apache.accumulo.minicluster.MiniAccumuloConfig; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; + +/** + * Start and stop an AccumuloMiniCluster for testing purposes + */ +public class AccumuloTestSetup extends TestSetup { + public static final String PASSWORD = "password"; + public static final String TABLE_NAME = "accumuloHiveTable"; + + protected MiniAccumuloCluster miniCluster; + + public AccumuloTestSetup(Test test) { + super(test); + } + + protected void setupWithHiveConf(HiveConf conf) throws Exception { + if (null == miniCluster) { + String testTmpDir = System.getProperty("test.tmp.dir"); + File tmpDir = new File(testTmpDir, "accumulo"); + + MiniAccumuloConfig cfg = new MiniAccumuloConfig(tmpDir, PASSWORD); + cfg.setNumTservers(1); + + miniCluster = new MiniAccumuloCluster(cfg); + + miniCluster.start(); + + createAccumuloTable(miniCluster.getConnector("root", PASSWORD)); + } + + // Setup connection information + conf.set(AccumuloConnectionParameters.USER_NAME, "root"); + conf.set(AccumuloConnectionParameters.USER_PASS, PASSWORD); + conf.set(AccumuloConnectionParameters.ZOOKEEPERS, miniCluster.getZooKeepers()); + conf.set(AccumuloConnectionParameters.INSTANCE_NAME, miniCluster.getInstanceName()); + } + + protected void createAccumuloTable(Connector conn) throws TableExistsException, + TableNotFoundException, AccumuloException, AccumuloSecurityException { + TableOperations tops = conn.tableOperations(); + if (tops.exists(TABLE_NAME)) { + tops.delete(TABLE_NAME); + } + + tops.create(TABLE_NAME); + + boolean[] booleans = new boolean[] {true, false, true}; + byte [] bytes = new byte [] { Byte.MIN_VALUE, -1, Byte.MAX_VALUE }; + short [] shorts = new short [] { Short.MIN_VALUE, -1, Short.MAX_VALUE }; + int [] ints = new int [] { Integer.MIN_VALUE, -1, Integer.MAX_VALUE }; + long [] longs = new long [] { Long.MIN_VALUE, -1, Long.MAX_VALUE }; + String [] strings = new String [] { "Hadoop, Accumulo", "Hive", "Test Strings" }; + float [] floats = new float [] { Float.MIN_VALUE, -1.0F, Float.MAX_VALUE }; + double [] doubles = new double [] { Double.MIN_VALUE, -1.0, Double.MAX_VALUE }; + HiveDecimal[] decimals = new HiveDecimal[] {HiveDecimal.create("3.14159"), HiveDecimal.create("2.71828"), HiveDecimal.create("0.57721")}; + Date[] dates = new Date[] {Date.valueOf("2014-01-01"), Date.valueOf("2014-03-01"), Date.valueOf("2014-05-01")}; + Timestamp[] timestamps = new Timestamp[] {new Timestamp(50), new Timestamp(100), new Timestamp(150)}; + + BatchWriter bw = conn.createBatchWriter(TABLE_NAME, new BatchWriterConfig()); + final String cf = "cf"; + try { + for (int i = 0; i < 3; i++) { + Mutation m = new Mutation("key-" + i); + m.put(cf, "cq-boolean", Boolean.toString(booleans[i])); + m.put(cf.getBytes(), "cq-byte".getBytes(), new byte[] {bytes[i]}); + m.put(cf, "cq-short", Short.toString(shorts[i])); + m.put(cf, "cq-int", Integer.toString(ints[i])); + m.put(cf, "cq-long", Long.toString(longs[i])); + m.put(cf, "cq-string", strings[i]); + m.put(cf, "cq-float", Float.toString(floats[i])); + m.put(cf, "cq-double", Double.toString(doubles[i])); + m.put(cf, "cq-decimal", decimals[i].toString()); + m.put(cf, "cq-date", dates[i].toString()); + m.put(cf, "cq-timestamp", timestamps[i].toString()); + + bw.addMutation(m); + } + } finally { + bw.close(); + } + } + + @Override + protected void tearDown() throws Exception { + if (null != miniCluster) { + miniCluster.stop(); + miniCluster = null; + } + } +} diff --git a/packaging/pom.xml b/packaging/pom.xml index cc12d1b..d11fc1e 100644 --- a/packaging/pom.xml +++ b/packaging/pom.xml @@ -182,6 +182,11 @@ ${project.version} + org.apache.hive + hive-accumulo-handler + ${project.version} + + org.apache.hive.hcatalog hive-hcatalog-streaming ${project.version} diff --git a/pom.xml b/pom.xml index b5a5697..5d4d718 100644 --- a/pom.xml +++ b/pom.xml @@ -31,6 +31,7 @@ + accumulo-handler ant beeline cli @@ -87,6 +88,7 @@ 1.8 + 1.6.0 5.5.0 1.9.1 3.4 @@ -362,6 +364,31 @@ ${commons-exec.version} + org.apache.accumulo + accumulo-core + ${accumulo.version} + + + org.apache.accumulo + accumulo-fate + ${accumulo.version} + + + org.apache.accumulo + accumulo-minicluster + ${accumulo.version} + + + org.apache.accumulo + accumulo-start + ${accumulo.version} + + + org.apache.accumulo + accumulo-trace + ${accumulo.version} + + org.apache.activemq activemq-core ${activemq.version}