From 2f2c89e3017f4740c5645e672d2cee5b770ffa54 Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Fri, 16 Dec 2016 16:11:02 -0800 Subject: [PATCH] HIVE-15419. Separate storage-api to be released independently. --- common/pom.xml | 1 - .../apache/hive/common/util/TestBloomFilter.java | 464 --------------------- .../hadoop/hive/metastore/AggregateStatsCache.java | 2 +- .../hadoop/hive/metastore/MetaStoreDirectSql.java | 2 +- .../hbase/AggrStatsInvalidatorFilter.java | 12 +- .../hive/metastore/hbase/HBaseReadWrite.java | 2 +- .../hadoop/hive/metastore/hbase/HBaseUtils.java | 2 +- .../hive/metastore/TestAggregateStatsCache.java | 2 +- packaging/pom.xml | 1 - pom.xml | 8 +- .../exec/persistence/HybridHashTableContainer.java | 2 +- .../tez/HostAffinitySplitLocationProvider.java | 2 +- storage-api/pom.xml | 74 +++- .../org/apache/hive/common/util/BloomFilter.java | 313 -------------- .../java/org/apache/hive/common/util/Murmur3.java | 335 --------------- .../org/apache/hive/common/util/TestMurmur3.java | 224 ---------- 16 files changed, 75 insertions(+), 1371 deletions(-) delete mode 100644 common/src/test/org/apache/hive/common/util/TestBloomFilter.java delete mode 100644 storage-api/src/java/org/apache/hive/common/util/BloomFilter.java delete mode 100644 storage-api/src/java/org/apache/hive/common/util/Murmur3.java delete mode 100644 storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java diff --git common/pom.xml common/pom.xml index 93275eff8f..fd948f816b 100644 --- common/pom.xml +++ common/pom.xml @@ -42,7 +42,6 @@ org.apache.hive hive-storage-api - ${project.version} diff --git common/src/test/org/apache/hive/common/util/TestBloomFilter.java common/src/test/org/apache/hive/common/util/TestBloomFilter.java deleted file mode 100644 index 63c70504c9..0000000000 --- common/src/test/org/apache/hive/common/util/TestBloomFilter.java +++ /dev/null @@ -1,464 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.common.util; - -import static org.junit.Assert.assertEquals; - -import java.util.Random; - -import org.junit.Test; - -/** - * - */ -public class TestBloomFilter { - private static final int COUNT = 100; - Random rand = new Random(123); - - @Test(expected = IllegalArgumentException.class) - public void testBloomIllegalArg1() { - BloomFilter bf = new BloomFilter(0, 0); - } - - @Test(expected = IllegalArgumentException.class) - public void testBloomIllegalArg2() { - BloomFilter bf = new BloomFilter(0, 0.1); - } - - @Test(expected = IllegalArgumentException.class) - public void testBloomIllegalArg3() { - BloomFilter bf = new BloomFilter(1, 0.0); - } - - @Test(expected = IllegalArgumentException.class) - public void testBloomIllegalArg4() { - BloomFilter bf = new BloomFilter(1, 1.0); - } - - @Test(expected = IllegalArgumentException.class) - public void testBloomIllegalArg5() { - BloomFilter bf = new BloomFilter(-1, -1); - } - - - @Test - public void testBloomNumBits() { - assertEquals(0, BloomFilter.optimalNumOfBits(0, 0)); - assertEquals(0, BloomFilter.optimalNumOfBits(0, 1)); - assertEquals(0, BloomFilter.optimalNumOfBits(1, 1)); - assertEquals(7, BloomFilter.optimalNumOfBits(1, 0.03)); - assertEquals(72, BloomFilter.optimalNumOfBits(10, 0.03)); - assertEquals(729, BloomFilter.optimalNumOfBits(100, 0.03)); - assertEquals(7298, BloomFilter.optimalNumOfBits(1000, 0.03)); - assertEquals(72984, BloomFilter.optimalNumOfBits(10000, 0.03)); - assertEquals(729844, BloomFilter.optimalNumOfBits(100000, 0.03)); - assertEquals(7298440, BloomFilter.optimalNumOfBits(1000000, 0.03)); - assertEquals(6235224, BloomFilter.optimalNumOfBits(1000000, 0.05)); - assertEquals(1870567268, BloomFilter.optimalNumOfBits(300000000, 0.05)); - assertEquals(1437758756, BloomFilter.optimalNumOfBits(300000000, 0.1)); - assertEquals(432808512, BloomFilter.optimalNumOfBits(300000000, 0.5)); - assertEquals(1393332198, BloomFilter.optimalNumOfBits(3000000000L, 0.8)); - assertEquals(657882327, BloomFilter.optimalNumOfBits(3000000000L, 0.9)); - assertEquals(0, BloomFilter.optimalNumOfBits(3000000000L, 1)); - } - - @Test - public void testBloomNumHashFunctions() { - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(-1, -1)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(0, 0)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 0)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 10)); - assertEquals(7, BloomFilter.optimalNumOfHashFunctions(10, 100)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100, 100)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000, 100)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10000, 100)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100000, 100)); - assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000000, 100)); - } - - @Test - public void testBloomFilterBytes() { - BloomFilter bf = new BloomFilter(10000); - byte[] val = new byte[]{1, 2, 3}; - byte[] val1 = new byte[]{1, 2, 3, 4}; - byte[] val2 = new byte[]{1, 2, 3, 4, 5}; - byte[] val3 = new byte[]{1, 2, 3, 4, 5, 6}; - - assertEquals(false, bf.test(val)); - assertEquals(false, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); - bf.add(val); - assertEquals(true, bf.test(val)); - assertEquals(false, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); - bf.add(val1); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(false, bf.test(val2)); - assertEquals(false, bf.test(val3)); - bf.add(val2); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(true, bf.test(val2)); - assertEquals(false, bf.test(val3)); - bf.add(val3); - assertEquals(true, bf.test(val)); - assertEquals(true, bf.test(val1)); - assertEquals(true, bf.test(val2)); - assertEquals(true, bf.test(val3)); - - byte[] randVal = new byte[COUNT]; - for (int i = 0; i < COUNT; i++) { - rand.nextBytes(randVal); - bf.add(randVal); - } - // last value should be present - assertEquals(true, bf.test(randVal)); - // most likely this value should not exist - randVal[0] = 0; - randVal[1] = 0; - randVal[2] = 0; - randVal[3] = 0; - randVal[4] = 0; - assertEquals(false, bf.test(randVal)); - - assertEquals(7800, bf.sizeInBytes()); - } - - @Test - public void testBloomFilterByte() { - BloomFilter bf = new BloomFilter(10000); - byte val = Byte.MIN_VALUE; - byte val1 = 1; - byte val2 = 2; - byte val3 = Byte.MAX_VALUE; - - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); - - byte randVal = 0; - for (int i = 0; i < COUNT; i++) { - randVal = (byte) rand.nextInt(Byte.MAX_VALUE); - bf.addLong(randVal); - } - // last value should be present - assertEquals(true, bf.testLong(randVal)); - // most likely this value should not exist - assertEquals(false, bf.testLong((byte) -120)); - - assertEquals(7800, bf.sizeInBytes()); - } - - @Test - public void testBloomFilterInt() { - BloomFilter bf = new BloomFilter(10000); - int val = Integer.MIN_VALUE; - int val1 = 1; - int val2 = 2; - int val3 = Integer.MAX_VALUE; - - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); - - int randVal = 0; - for (int i = 0; i < COUNT; i++) { - randVal = rand.nextInt(); - bf.addLong(randVal); - } - // last value should be present - assertEquals(true, bf.testLong(randVal)); - // most likely this value should not exist - assertEquals(false, bf.testLong(-120)); - - assertEquals(7800, bf.sizeInBytes()); - } - - @Test - public void testBloomFilterLong() { - BloomFilter bf = new BloomFilter(10000); - long val = Long.MIN_VALUE; - long val1 = 1; - long val2 = 2; - long val3 = Long.MAX_VALUE; - - assertEquals(false, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val); - assertEquals(true, bf.testLong(val)); - assertEquals(false, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val1); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(false, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val2); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(false, bf.testLong(val3)); - bf.addLong(val3); - assertEquals(true, bf.testLong(val)); - assertEquals(true, bf.testLong(val1)); - assertEquals(true, bf.testLong(val2)); - assertEquals(true, bf.testLong(val3)); - - long randVal = 0; - for (int i = 0; i < COUNT; i++) { - randVal = rand.nextLong(); - bf.addLong(randVal); - } - // last value should be present - assertEquals(true, bf.testLong(randVal)); - // most likely this value should not exist - assertEquals(false, bf.testLong(-120)); - - assertEquals(7800, bf.sizeInBytes()); - } - - @Test - public void testBloomFilterFloat() { - BloomFilter bf = new BloomFilter(10000); - float val = Float.MIN_VALUE; - float val1 = 1.1f; - float val2 = 2.2f; - float val3 = Float.MAX_VALUE; - - assertEquals(false, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val); - assertEquals(true, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val1); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val2); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val3); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(true, bf.testDouble(val3)); - - float randVal = 0; - for (int i = 0; i < COUNT; i++) { - randVal = rand.nextFloat(); - bf.addDouble(randVal); - } - // last value should be present - assertEquals(true, bf.testDouble(randVal)); - // most likely this value should not exist - assertEquals(false, bf.testDouble(-120.2f)); - - assertEquals(7800, bf.sizeInBytes()); - } - - @Test - public void testBloomFilterDouble() { - BloomFilter bf = new BloomFilter(10000); - double val = Double.MIN_VALUE; - double val1 = 1.1d; - double val2 = 2.2d; - double val3 = Double.MAX_VALUE; - - assertEquals(false, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val); - assertEquals(true, bf.testDouble(val)); - assertEquals(false, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val1); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(false, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val2); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(false, bf.testDouble(val3)); - bf.addDouble(val3); - assertEquals(true, bf.testDouble(val)); - assertEquals(true, bf.testDouble(val1)); - assertEquals(true, bf.testDouble(val2)); - assertEquals(true, bf.testDouble(val3)); - - double randVal = 0; - for (int i = 0; i < COUNT; i++) { - randVal = rand.nextDouble(); - bf.addDouble(randVal); - } - // last value should be present - assertEquals(true, bf.testDouble(randVal)); - // most likely this value should not exist - assertEquals(false, bf.testDouble(-120.2d)); - - assertEquals(7800, bf.sizeInBytes()); - } - - @Test - public void testBloomFilterString() { - BloomFilter bf = new BloomFilter(100000); - String val = "bloo"; - String val1 = "bloom fil"; - String val2 = "bloom filter"; - String val3 = "cuckoo filter"; - - assertEquals(false, bf.testString(val)); - assertEquals(false, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); - bf.addString(val); - assertEquals(true, bf.testString(val)); - assertEquals(false, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); - bf.addString(val1); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(false, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); - bf.addString(val2); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(false, bf.testString(val3)); - bf.addString(val3); - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); - - long randVal = 0; - for (int i = 0; i < COUNT; i++) { - randVal = rand.nextLong(); - bf.addString(Long.toString(randVal)); - } - // last value should be present - assertEquals(true, bf.testString(Long.toString(randVal))); - // most likely this value should not exist - assertEquals(false, bf.testString(Long.toString(-120))); - - assertEquals(77944, bf.sizeInBytes()); - } - - @Test - public void testMerge() { - BloomFilter bf = new BloomFilter(10000); - String val = "bloo"; - String val1 = "bloom fil"; - String val2 = "bloom filter"; - String val3 = "cuckoo filter"; - bf.addString(val); - bf.addString(val1); - bf.addString(val2); - bf.addString(val3); - - BloomFilter bf2 = new BloomFilter(10000); - String v = "2_bloo"; - String v1 = "2_bloom fil"; - String v2 = "2_bloom filter"; - String v3 = "2_cuckoo filter"; - bf2.addString(v); - bf2.addString(v1); - bf2.addString(v2); - bf2.addString(v3); - - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); - assertEquals(false, bf.testString(v)); - assertEquals(false, bf.testString(v1)); - assertEquals(false, bf.testString(v2)); - assertEquals(false, bf.testString(v3)); - - bf.merge(bf2); - - assertEquals(true, bf.testString(val)); - assertEquals(true, bf.testString(val1)); - assertEquals(true, bf.testString(val2)); - assertEquals(true, bf.testString(val3)); - assertEquals(true, bf.testString(v)); - assertEquals(true, bf.testString(v1)); - assertEquals(true, bf.testString(v2)); - assertEquals(true, bf.testString(v3)); - } -} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java index 4ab178c606..aa38cd3d7f 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/AggregateStatsCache.java @@ -24,7 +24,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hive.common.util.BloomFilter; +import org.apache.orc.util.BloomFilter; import java.util.ArrayList; import java.util.HashMap; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java index dadc6f699b..d122d72a55 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java @@ -71,7 +71,7 @@ import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hive.common.util.BloomFilter; +import org.apache.orc.util.BloomFilter; import org.datanucleus.store.rdbms.query.ForwardQueryResult; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggrStatsInvalidatorFilter.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggrStatsInvalidatorFilter.java index 4ca4229acd..de41330c91 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggrStatsInvalidatorFilter.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggrStatsInvalidatorFilter.java @@ -26,7 +26,7 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterBase; -import org.apache.hive.common.util.BloomFilter; +import org.apache.orc.util.BloomFilter; import java.io.IOException; import java.util.Arrays; @@ -101,10 +101,12 @@ public ReturnCode filterKeyValue(Cell cell) throws IOException { entry.getTableName().equals(fromCol.getTableName())) { if (bloom == null) { // Now, reconstitute the bloom filter and probe it with each of our partition names - bloom = new BloomFilter( - fromCol.getBloomFilter().getBitsList(), - fromCol.getBloomFilter().getNumBits(), - fromCol.getBloomFilter().getNumFuncs()); + HbaseMetastoreProto.AggrStatsBloomFilter.BloomFilter bloomFilter = fromCol.getBloomFilter(); + long bits[] = new long[bloomFilter.getBitsCount()]; + for (int i = 0; i < bits.length; ++i) { + bits[i] = bloomFilter.getBits(i); + } + bloom = new BloomFilter(bits, bloomFilter.getNumFuncs()); } if (bloom.test(entry.getPartName().toByteArray())) { // This is most likely a match, so mark it and quit looking. diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java index e687a69ecb..6704a108cd 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java @@ -61,7 +61,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.hbase.PartitionKeyComparator.Operator; -import org.apache.hive.common.util.BloomFilter; +import org.apache.orc.util.BloomFilter; import org.apache.thrift.TBase; import org.apache.thrift.TException; import org.apache.thrift.protocol.TProtocol; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java index 94087b164e..18441f3215 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java @@ -82,7 +82,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; -import org.apache.hive.common.util.BloomFilter; +import org.apache.orc.util.BloomFilter; import org.apache.hive.common.util.HiveStringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git metastore/src/test/org/apache/hadoop/hive/metastore/TestAggregateStatsCache.java metastore/src/test/org/apache/hadoop/hive/metastore/TestAggregateStatsCache.java index 40700daab8..efdde1f6f3 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/TestAggregateStatsCache.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/TestAggregateStatsCache.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.AggregateStatsCache.AggrColStats; import org.apache.hadoop.hive.metastore.AggregateStatsCache.Key; -import org.apache.hive.common.util.BloomFilter; +import org.apache.orc.util.BloomFilter; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; diff --git packaging/pom.xml packaging/pom.xml index 76e0cffdcf..7a538563c7 100644 --- packaging/pom.xml +++ packaging/pom.xml @@ -246,7 +246,6 @@ org.apache.hive hive-storage-api - ${project.version} org.apache.hive diff --git pom.xml pom.xml index 177dadffe7..e5db24652d 100644 --- pom.xml +++ pom.xml @@ -183,6 +183,7 @@ 1.0.1 1.7.10 4.0.4 + 2.3.0-SNAPSHOT 0.8.4 0.90.2-incubating 2.2.0 @@ -507,7 +508,12 @@ stax-api ${stax.version} - + + org.apache.hive + hive-storage-api + ${storage-api.version} + + org.apache.orc orc-core ${orc.version} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 04e89e8e74..90895ce878 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -58,7 +58,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; -import org.apache.hive.common.util.BloomFilter; +import org.apache.orc.util.BloomFilter; import org.apache.hive.common.util.HashCodeUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java index dcb985fbf9..10b8b31c43 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HostAffinitySplitLocationProvider.java @@ -25,7 +25,7 @@ import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.split.SplitLocationProvider; -import org.apache.hive.common.util.Murmur3; +import org.apache.orc.util.Murmur3; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git storage-api/pom.xml storage-api/pom.xml index 4767be64cd..19e43540a4 100644 --- storage-api/pom.xml +++ storage-api/pom.xml @@ -17,31 +17,29 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.apache.hive - hive - 2.2.0-SNAPSHOT - ../pom.xml + org.apache + apache + 18 + + org.apache.hive hive-storage-api + 2.3.0-SNAPSHOT jar Hive Storage API - - .. - - - + commons-lang commons-lang - ${commons-lang.version} + 2.6 org.apache.hadoop hadoop-common - ${hadoop.version} + 2.6.0 provided @@ -49,7 +47,11 @@ guava - commmons-logging + com.google.code.findbugs + jsr305 + + + commons-logging commons-logging @@ -65,20 +67,14 @@ org.slf4j slf4j-api - ${slf4j.version} + 1.7.5 - com.google.guava - guava - ${guava.version} - test - - junit junit - ${junit.version} + 4.11 test @@ -91,5 +87,43 @@ ${basedir}/src/test/resources + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 1.7 + 1.7 + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + ${project.basedir}/../../site/api + ${project.artifactId} + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 2.9 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.16 + + false + -Xmx2048m + false + + ${test.tmp.dir} + + + + diff --git storage-api/src/java/org/apache/hive/common/util/BloomFilter.java storage-api/src/java/org/apache/hive/common/util/BloomFilter.java deleted file mode 100644 index e60690d97f..0000000000 --- storage-api/src/java/org/apache/hive/common/util/BloomFilter.java +++ /dev/null @@ -1,313 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.common.util; - -import java.util.Arrays; -import java.util.List; - -/** - * BloomFilter is a probabilistic data structure for set membership check. BloomFilters are - * highly space efficient when compared to using a HashSet. Because of the probabilistic nature of - * bloom filter false positive (element not present in bloom filter but test() says true) are - * possible but false negatives are not possible (if element is present then test() will never - * say false). The false positive probability is configurable (default: 5%) depending on which - * storage requirement may increase or decrease. Lower the false positive probability greater - * is the space requirement. - * Bloom filters are sensitive to number of elements that will be inserted in the bloom filter. - * During the creation of bloom filter expected number of entries must be specified. If the number - * of insertions exceed the specified initial number of entries then false positive probability will - * increase accordingly. - * - * Internally, this implementation of bloom filter uses Murmur3 fast non-cryptographic hash - * algorithm. Although Murmur2 is slightly faster than Murmur3 in Java, it suffers from hash - * collisions for specific sequence of repeating bytes. Check the following link for more info - * https://code.google.com/p/smhasher/wiki/MurmurHash2Flaw - */ -public class BloomFilter { - public static final double DEFAULT_FPP = 0.05; - protected BitSet bitSet; - protected int numBits; - protected int numHashFunctions; - - public BloomFilter() { - } - - public BloomFilter(long expectedEntries) { - this(expectedEntries, DEFAULT_FPP); - } - - static void checkArgument(boolean expression, String message) { - if (!expression) { - throw new IllegalArgumentException(message); - } - } - - public BloomFilter(long expectedEntries, double fpp) { - checkArgument(expectedEntries > 0, "expectedEntries should be > 0"); - checkArgument(fpp > 0.0 && fpp < 1.0, "False positive probability should be > 0.0 & < 1.0"); - int nb = optimalNumOfBits(expectedEntries, fpp); - // make 'm' multiple of 64 - this.numBits = nb + (Long.SIZE - (nb % Long.SIZE)); - this.numHashFunctions = optimalNumOfHashFunctions(expectedEntries, numBits); - this.bitSet = new BitSet(numBits); - } - - /** - * A constructor to support rebuilding the BloomFilter from a serialized representation. - * @param bits - * @param numBits - * @param numFuncs - */ - public BloomFilter(List bits, int numBits, int numFuncs) { - super(); - long[] copied = new long[bits.size()]; - for (int i = 0; i < bits.size(); i++) copied[i] = bits.get(i); - bitSet = new BitSet(copied); - this.numBits = numBits; - numHashFunctions = numFuncs; - } - - static int optimalNumOfHashFunctions(long n, long m) { - return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); - } - - static int optimalNumOfBits(long n, double p) { - return (int) (-n * Math.log(p) / (Math.log(2) * Math.log(2))); - } - - public void add(byte[] val) { - if (val == null) { - addBytes(val, -1, -1); - } else { - addBytes(val, 0, val.length); - } - } - - public void addBytes(byte[] val, int offset, int length) { - // We use the trick mentioned in "Less Hashing, Same Performance: Building a Better Bloom Filter" - // by Kirsch et.al. From abstract 'only two hash functions are necessary to effectively - // implement a Bloom filter without any loss in the asymptotic false positive probability' - - // Lets split up 64-bit hashcode into two 32-bit hash codes and employ the technique mentioned - // in the above paper - long hash64 = val == null ? Murmur3.NULL_HASHCODE : - Murmur3.hash64(val, offset, length); - addHash(hash64); - } - - private void addHash(long hash64) { - int hash1 = (int) hash64; - int hash2 = (int) (hash64 >>> 32); - - for (int i = 1; i <= numHashFunctions; i++) { - int combinedHash = hash1 + (i * hash2); - // hashcode should be positive, flip all the bits if it's negative - if (combinedHash < 0) { - combinedHash = ~combinedHash; - } - int pos = combinedHash % numBits; - bitSet.set(pos); - } - } - - public void addString(String val) { - if (val == null) { - add(null); - } else { - add(val.getBytes()); - } - } - - public void addLong(long val) { - addHash(getLongHash(val)); - } - - public void addDouble(double val) { - addLong(Double.doubleToLongBits(val)); - } - - public boolean test(byte[] val) { - if (val == null) { - return testBytes(val, -1, -1); - } - return testBytes(val, 0, val.length); - } - - public boolean testBytes(byte[] val, int offset, int length) { - long hash64 = val == null ? Murmur3.NULL_HASHCODE : - Murmur3.hash64(val, offset, length); - return testHash(hash64); - } - - private boolean testHash(long hash64) { - int hash1 = (int) hash64; - int hash2 = (int) (hash64 >>> 32); - - for (int i = 1; i <= numHashFunctions; i++) { - int combinedHash = hash1 + (i * hash2); - // hashcode should be positive, flip all the bits if it's negative - if (combinedHash < 0) { - combinedHash = ~combinedHash; - } - int pos = combinedHash % numBits; - if (!bitSet.get(pos)) { - return false; - } - } - return true; - } - - public boolean testString(String val) { - if (val == null) { - return test(null); - } else { - return test(val.getBytes()); - } - } - - public boolean testLong(long val) { - return testHash(getLongHash(val)); - } - - // Thomas Wang's integer hash function - // http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm - private long getLongHash(long key) { - key = (~key) + (key << 21); // key = (key << 21) - key - 1; - key = key ^ (key >> 24); - key = (key + (key << 3)) + (key << 8); // key * 265 - key = key ^ (key >> 14); - key = (key + (key << 2)) + (key << 4); // key * 21 - key = key ^ (key >> 28); - key = key + (key << 31); - return key; - } - - public boolean testDouble(double val) { - return testLong(Double.doubleToLongBits(val)); - } - - public long sizeInBytes() { - return getBitSize() / 8; - } - - public int getBitSize() { - return bitSet.getData().length * Long.SIZE; - } - - public int getNumHashFunctions() { - return numHashFunctions; - } - - public long[] getBitSet() { - return bitSet.getData(); - } - - @Override - public String toString() { - return "m: " + numBits + " k: " + numHashFunctions; - } - - /** - * Merge the specified bloom filter with current bloom filter. - * - * @param that - bloom filter to merge - */ - public void merge(BloomFilter that) { - if (this != that && this.numBits == that.numBits && this.numHashFunctions == that.numHashFunctions) { - this.bitSet.putAll(that.bitSet); - } else { - throw new IllegalArgumentException("BloomFilters are not compatible for merging." + - " this - " + this.toString() + " that - " + that.toString()); - } - } - - public void reset() { - this.bitSet.clear(); - } - - /** - * Bare metal bit set implementation. For performance reasons, this implementation does not check - * for index bounds nor expand the bit set size if the specified index is greater than the size. - */ - public class BitSet { - private final long[] data; - - public BitSet(long bits) { - this(new long[(int) Math.ceil((double) bits / (double) Long.SIZE)]); - } - - /** - * Deserialize long array as bit set. - * - * @param data - bit array - */ - public BitSet(long[] data) { - assert data.length > 0 : "data length is zero!"; - this.data = data; - } - - /** - * Sets the bit at specified index. - * - * @param index - position - */ - public void set(int index) { - data[index >>> 6] |= (1L << index); - } - - /** - * Returns true if the bit is set in the specified index. - * - * @param index - position - * @return - value at the bit position - */ - public boolean get(int index) { - return (data[index >>> 6] & (1L << index)) != 0; - } - - /** - * Number of bits - */ - public long bitSize() { - return (long) data.length * Long.SIZE; - } - - public long[] getData() { - return data; - } - - /** - * Combines the two BitArrays using bitwise OR. - */ - public void putAll(BitSet array) { - assert data.length == array.data.length : - "BitArrays must be of equal length (" + data.length + "!= " + array.data.length + ")"; - for (int i = 0; i < data.length; i++) { - data[i] |= array.data[i]; - } - } - - /** - * Clear the bit set. - */ - public void clear() { - Arrays.fill(data, 0); - } - } -} diff --git storage-api/src/java/org/apache/hive/common/util/Murmur3.java storage-api/src/java/org/apache/hive/common/util/Murmur3.java deleted file mode 100644 index 88c3514b57..0000000000 --- storage-api/src/java/org/apache/hive/common/util/Murmur3.java +++ /dev/null @@ -1,335 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.common.util; - -/** - * Murmur3 is successor to Murmur2 fast non-crytographic hash algorithms. - * - * Murmur3 32 and 128 bit variants. - * 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94 - * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255 - * - * This is a public domain code with no copyrights. - * From homepage of MurmurHash (https://code.google.com/p/smhasher/), - * "All MurmurHash versions are public domain software, and the author disclaims all copyright - * to their code." - */ -public class Murmur3 { - // from 64-bit linear congruential generator - public static final long NULL_HASHCODE = 2862933555777941757L; - - // Constants for 32 bit variant - private static final int C1_32 = 0xcc9e2d51; - private static final int C2_32 = 0x1b873593; - private static final int R1_32 = 15; - private static final int R2_32 = 13; - private static final int M_32 = 5; - private static final int N_32 = 0xe6546b64; - - // Constants for 128 bit variant - private static final long C1 = 0x87c37b91114253d5L; - private static final long C2 = 0x4cf5ad432745937fL; - private static final int R1 = 31; - private static final int R2 = 27; - private static final int R3 = 33; - private static final int M = 5; - private static final int N1 = 0x52dce729; - private static final int N2 = 0x38495ab5; - - private static final int DEFAULT_SEED = 104729; - - /** - * Murmur3 32-bit variant. - * - * @param data - input byte array - * @return - hashcode - */ - public static int hash32(byte[] data) { - return hash32(data, data.length, DEFAULT_SEED); - } - - /** - * Murmur3 32-bit variant. - * - * @param data - input byte array - * @param length - length of array - * @param seed - seed. (default 0) - * @return - hashcode - */ - public static int hash32(byte[] data, int length, int seed) { - int hash = seed; - final int nblocks = length >> 2; - - // body - for (int i = 0; i < nblocks; i++) { - int i_4 = i << 2; - int k = (data[i_4] & 0xff) - | ((data[i_4 + 1] & 0xff) << 8) - | ((data[i_4 + 2] & 0xff) << 16) - | ((data[i_4 + 3] & 0xff) << 24); - - // mix functions - k *= C1_32; - k = Integer.rotateLeft(k, R1_32); - k *= C2_32; - hash ^= k; - hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; - } - - // tail - int idx = nblocks << 2; - int k1 = 0; - switch (length - idx) { - case 3: - k1 ^= data[idx + 2] << 16; - case 2: - k1 ^= data[idx + 1] << 8; - case 1: - k1 ^= data[idx]; - - // mix functions - k1 *= C1_32; - k1 = Integer.rotateLeft(k1, R1_32); - k1 *= C2_32; - hash ^= k1; - } - - // finalization - hash ^= length; - hash ^= (hash >>> 16); - hash *= 0x85ebca6b; - hash ^= (hash >>> 13); - hash *= 0xc2b2ae35; - hash ^= (hash >>> 16); - - return hash; - } - - /** - * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant. - * - * @param data - input byte array - * @return - hashcode - */ - public static long hash64(byte[] data) { - return hash64(data, 0, data.length, DEFAULT_SEED); - } - - public static long hash64(byte[] data, int offset, int length) { - return hash64(data, offset, length, DEFAULT_SEED); - } - - /** - * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant. - * - * @param data - input byte array - * @param length - length of array - * @param seed - seed. (default is 0) - * @return - hashcode - */ - public static long hash64(byte[] data, int offset, int length, int seed) { - long hash = seed; - final int nblocks = length >> 3; - - // body - for (int i = 0; i < nblocks; i++) { - final int i8 = i << 3; - long k = ((long) data[offset + i8] & 0xff) - | (((long) data[offset + i8 + 1] & 0xff) << 8) - | (((long) data[offset + i8 + 2] & 0xff) << 16) - | (((long) data[offset + i8 + 3] & 0xff) << 24) - | (((long) data[offset + i8 + 4] & 0xff) << 32) - | (((long) data[offset + i8 + 5] & 0xff) << 40) - | (((long) data[offset + i8 + 6] & 0xff) << 48) - | (((long) data[offset + i8 + 7] & 0xff) << 56); - - // mix functions - k *= C1; - k = Long.rotateLeft(k, R1); - k *= C2; - hash ^= k; - hash = Long.rotateLeft(hash, R2) * M + N1; - } - - // tail - long k1 = 0; - int tailStart = nblocks << 3; - switch (length - tailStart) { - case 7: - k1 ^= ((long) data[offset + tailStart + 6] & 0xff) << 48; - case 6: - k1 ^= ((long) data[offset + tailStart + 5] & 0xff) << 40; - case 5: - k1 ^= ((long) data[offset + tailStart + 4] & 0xff) << 32; - case 4: - k1 ^= ((long) data[offset + tailStart + 3] & 0xff) << 24; - case 3: - k1 ^= ((long) data[offset + tailStart + 2] & 0xff) << 16; - case 2: - k1 ^= ((long) data[offset + tailStart + 1] & 0xff) << 8; - case 1: - k1 ^= ((long) data[offset + tailStart] & 0xff); - k1 *= C1; - k1 = Long.rotateLeft(k1, R1); - k1 *= C2; - hash ^= k1; - } - - // finalization - hash ^= length; - hash = fmix64(hash); - - return hash; - } - - /** - * Murmur3 128-bit variant. - * - * @param data - input byte array - * @return - hashcode (2 longs) - */ - public static long[] hash128(byte[] data) { - return hash128(data, 0, data.length, DEFAULT_SEED); - } - - /** - * Murmur3 128-bit variant. - * - * @param data - input byte array - * @param offset - the first element of array - * @param length - length of array - * @param seed - seed. (default is 0) - * @return - hashcode (2 longs) - */ - public static long[] hash128(byte[] data, int offset, int length, int seed) { - long h1 = seed; - long h2 = seed; - final int nblocks = length >> 4; - - // body - for (int i = 0; i < nblocks; i++) { - final int i16 = i << 4; - long k1 = ((long) data[offset + i16] & 0xff) - | (((long) data[offset + i16 + 1] & 0xff) << 8) - | (((long) data[offset + i16 + 2] & 0xff) << 16) - | (((long) data[offset + i16 + 3] & 0xff) << 24) - | (((long) data[offset + i16 + 4] & 0xff) << 32) - | (((long) data[offset + i16 + 5] & 0xff) << 40) - | (((long) data[offset + i16 + 6] & 0xff) << 48) - | (((long) data[offset + i16 + 7] & 0xff) << 56); - - long k2 = ((long) data[offset + i16 + 8] & 0xff) - | (((long) data[offset + i16 + 9] & 0xff) << 8) - | (((long) data[offset + i16 + 10] & 0xff) << 16) - | (((long) data[offset + i16 + 11] & 0xff) << 24) - | (((long) data[offset + i16 + 12] & 0xff) << 32) - | (((long) data[offset + i16 + 13] & 0xff) << 40) - | (((long) data[offset + i16 + 14] & 0xff) << 48) - | (((long) data[offset + i16 + 15] & 0xff) << 56); - - // mix functions for k1 - k1 *= C1; - k1 = Long.rotateLeft(k1, R1); - k1 *= C2; - h1 ^= k1; - h1 = Long.rotateLeft(h1, R2); - h1 += h2; - h1 = h1 * M + N1; - - // mix functions for k2 - k2 *= C2; - k2 = Long.rotateLeft(k2, R3); - k2 *= C1; - h2 ^= k2; - h2 = Long.rotateLeft(h2, R1); - h2 += h1; - h2 = h2 * M + N2; - } - - // tail - long k1 = 0; - long k2 = 0; - int tailStart = nblocks << 4; - switch (length - tailStart) { - case 15: - k2 ^= (long) (data[offset + tailStart + 14] & 0xff) << 48; - case 14: - k2 ^= (long) (data[offset + tailStart + 13] & 0xff) << 40; - case 13: - k2 ^= (long) (data[offset + tailStart + 12] & 0xff) << 32; - case 12: - k2 ^= (long) (data[offset + tailStart + 11] & 0xff) << 24; - case 11: - k2 ^= (long) (data[offset + tailStart + 10] & 0xff) << 16; - case 10: - k2 ^= (long) (data[offset + tailStart + 9] & 0xff) << 8; - case 9: - k2 ^= (long) (data[offset + tailStart + 8] & 0xff); - k2 *= C2; - k2 = Long.rotateLeft(k2, R3); - k2 *= C1; - h2 ^= k2; - - case 8: - k1 ^= (long) (data[offset + tailStart + 7] & 0xff) << 56; - case 7: - k1 ^= (long) (data[offset + tailStart + 6] & 0xff) << 48; - case 6: - k1 ^= (long) (data[offset + tailStart + 5] & 0xff) << 40; - case 5: - k1 ^= (long) (data[offset + tailStart + 4] & 0xff) << 32; - case 4: - k1 ^= (long) (data[offset + tailStart + 3] & 0xff) << 24; - case 3: - k1 ^= (long) (data[offset + tailStart + 2] & 0xff) << 16; - case 2: - k1 ^= (long) (data[offset + tailStart + 1] & 0xff) << 8; - case 1: - k1 ^= (long) (data[offset + tailStart] & 0xff); - k1 *= C1; - k1 = Long.rotateLeft(k1, R1); - k1 *= C2; - h1 ^= k1; - } - - // finalization - h1 ^= length; - h2 ^= length; - - h1 += h2; - h2 += h1; - - h1 = fmix64(h1); - h2 = fmix64(h2); - - h1 += h2; - h2 += h1; - - return new long[]{h1, h2}; - } - - private static long fmix64(long h) { - h ^= (h >>> 33); - h *= 0xff51afd7ed558ccdL; - h ^= (h >>> 33); - h *= 0xc4ceb9fe1a85ec53L; - h ^= (h >>> 33); - return h; - } -} diff --git storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java deleted file mode 100644 index 5facc7c4de..0000000000 --- storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java +++ /dev/null @@ -1,224 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.common.util; - -import static org.junit.Assert.assertEquals; - -import com.google.common.hash.HashFunction; -import com.google.common.hash.Hashing; - -import org.junit.Test; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.util.Arrays; -import java.util.Random; - -/** - * Tests for Murmur3 variants. - */ -public class TestMurmur3 { - - @Test - public void testHashCodesM3_32_string() { - String key = "test"; - int seed = 123; - HashFunction hf = Hashing.murmur3_32(seed); - int hc1 = hf.hashBytes(key.getBytes()).asInt(); - int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed); - assertEquals(hc1, hc2); - - key = "testkey"; - hc1 = hf.hashBytes(key.getBytes()).asInt(); - hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed); - assertEquals(hc1, hc2); - } - - @Test - public void testHashCodesM3_32_ints() { - int seed = 123; - Random rand = new Random(seed); - HashFunction hf = Hashing.murmur3_32(seed); - for (int i = 0; i < 1000; i++) { - int val = rand.nextInt(); - byte[] data = ByteBuffer.allocate(4).putInt(val).array(); - int hc1 = hf.hashBytes(data).asInt(); - int hc2 = Murmur3.hash32(data, data.length, seed); - assertEquals(hc1, hc2); - } - } - - @Test - public void testHashCodesM3_32_longs() { - int seed = 123; - Random rand = new Random(seed); - HashFunction hf = Hashing.murmur3_32(seed); - for (int i = 0; i < 1000; i++) { - long val = rand.nextLong(); - byte[] data = ByteBuffer.allocate(8).putLong(val).array(); - int hc1 = hf.hashBytes(data).asInt(); - int hc2 = Murmur3.hash32(data, data.length, seed); - assertEquals(hc1, hc2); - } - } - - @Test - public void testHashCodesM3_32_double() { - int seed = 123; - Random rand = new Random(seed); - HashFunction hf = Hashing.murmur3_32(seed); - for (int i = 0; i < 1000; i++) { - double val = rand.nextDouble(); - byte[] data = ByteBuffer.allocate(8).putDouble(val).array(); - int hc1 = hf.hashBytes(data).asInt(); - int hc2 = Murmur3.hash32(data, data.length, seed); - assertEquals(hc1, hc2); - } - } - - @Test - public void testHashCodesM3_128_string() { - String key = "test"; - int seed = 123; - HashFunction hf = Hashing.murmur3_128(seed); - // guava stores the hashcodes in little endian order - ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); - buf.put(hf.hashBytes(key.getBytes()).asBytes()); - buf.flip(); - long gl1 = buf.getLong(); - long gl2 = buf.getLong(8); - long[] hc = Murmur3.hash128(key.getBytes(), 0, key.getBytes().length, seed); - long m1 = hc[0]; - long m2 = hc[1]; - assertEquals(gl1, m1); - assertEquals(gl2, m2); - - key = "testkey128_testkey128"; - buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); - buf.put(hf.hashBytes(key.getBytes()).asBytes()); - buf.flip(); - gl1 = buf.getLong(); - gl2 = buf.getLong(8); - byte[] keyBytes = key.getBytes(); - hc = Murmur3.hash128(keyBytes, 0, keyBytes.length, seed); - m1 = hc[0]; - m2 = hc[1]; - assertEquals(gl1, m1); - assertEquals(gl2, m2); - - byte[] offsetKeyBytes = new byte[keyBytes.length + 35]; - Arrays.fill(offsetKeyBytes, (byte) -1); - System.arraycopy(keyBytes, 0, offsetKeyBytes, 35, keyBytes.length); - hc = Murmur3.hash128(offsetKeyBytes, 35, keyBytes.length, seed); - assertEquals(gl1, hc[0]); - assertEquals(gl2, hc[1]); - } - - @Test - public void testHashCodeM3_64() { - byte[] origin = ("It was the best of times, it was the worst of times," + - " it was the age of wisdom, it was the age of foolishness," + - " it was the epoch of belief, it was the epoch of incredulity," + - " it was the season of Light, it was the season of Darkness," + - " it was the spring of hope, it was the winter of despair," + - " we had everything before us, we had nothing before us," + - " we were all going direct to Heaven," + - " we were all going direct the other way.").getBytes(); - long hash = Murmur3.hash64(origin, 0, origin.length); - assertEquals(305830725663368540L, hash); - - byte[] originOffset = new byte[origin.length + 150]; - Arrays.fill(originOffset, (byte) 123); - System.arraycopy(origin, 0, originOffset, 150, origin.length); - hash = Murmur3.hash64(originOffset, 150, origin.length); - assertEquals(305830725663368540L, hash); - } - - @Test - public void testHashCodesM3_128_ints() { - int seed = 123; - Random rand = new Random(seed); - HashFunction hf = Hashing.murmur3_128(seed); - for (int i = 0; i < 1000; i++) { - int val = rand.nextInt(); - byte[] data = ByteBuffer.allocate(4).putInt(val).array(); - // guava stores the hashcodes in little endian order - ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); - buf.put(hf.hashBytes(data).asBytes()); - buf.flip(); - long gl1 = buf.getLong(); - long gl2 = buf.getLong(8); - long[] hc = Murmur3.hash128(data, 0, data.length, seed); - long m1 = hc[0]; - long m2 = hc[1]; - assertEquals(gl1, m1); - assertEquals(gl2, m2); - - byte[] offsetData = new byte[data.length + 50]; - System.arraycopy(data, 0, offsetData, 50, data.length); - hc = Murmur3.hash128(offsetData, 50, data.length, seed); - assertEquals(gl1, hc[0]); - assertEquals(gl2, hc[1]); - } - } - - @Test - public void testHashCodesM3_128_longs() { - int seed = 123; - Random rand = new Random(seed); - HashFunction hf = Hashing.murmur3_128(seed); - for (int i = 0; i < 1000; i++) { - long val = rand.nextLong(); - byte[] data = ByteBuffer.allocate(8).putLong(val).array(); - // guava stores the hashcodes in little endian order - ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); - buf.put(hf.hashBytes(data).asBytes()); - buf.flip(); - long gl1 = buf.getLong(); - long gl2 = buf.getLong(8); - long[] hc = Murmur3.hash128(data, 0, data.length, seed); - long m1 = hc[0]; - long m2 = hc[1]; - assertEquals(gl1, m1); - assertEquals(gl2, m2); - } - } - - @Test - public void testHashCodesM3_128_double() { - int seed = 123; - Random rand = new Random(seed); - HashFunction hf = Hashing.murmur3_128(seed); - for (int i = 0; i < 1000; i++) { - double val = rand.nextDouble(); - byte[] data = ByteBuffer.allocate(8).putDouble(val).array(); - // guava stores the hashcodes in little endian order - ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN); - buf.put(hf.hashBytes(data).asBytes()); - buf.flip(); - long gl1 = buf.getLong(); - long gl2 = buf.getLong(8); - long[] hc = Murmur3.hash128(data, 0, data.length, seed); - long m1 = hc[0]; - long m2 = hc[1]; - assertEquals(gl1, m1); - assertEquals(gl2, m2); - } - } -} -- 2.11.0