commit 1e221824a2a50f1d00d88d16749131b6cdf5d3ca Author: Owen O'Malley Date: Tue Nov 4 14:49:32 2014 -0800 HIVE-8732. Fix column statistics. diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java index 65b5ca8..3235b0e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java @@ -394,7 +394,8 @@ void merge(ColumnStatisticsImpl other) { } else if (str.minimum != null) { if (minimum.compareTo(str.minimum) > 0) { minimum = new Text(str.getMinimum()); - } else if (maximum.compareTo(str.maximum) < 0) { + } + if (maximum.compareTo(str.maximum) < 0) { maximum = new Text(str.getMaximum()); } } @@ -563,7 +564,8 @@ void merge(ColumnStatisticsImpl other) { } else if (dec.minimum != null) { if (minimum.compareTo(dec.minimum) > 0) { minimum = dec.minimum; - } else if (maximum.compareTo(dec.maximum) < 0) { + } + if (maximum.compareTo(dec.maximum) < 0) { maximum = dec.maximum; } if (sum == null || dec.sum == null) { @@ -671,7 +673,8 @@ void merge(ColumnStatisticsImpl other) { } else if (dateStats.minimum != null) { if (minimum > dateStats.minimum) { minimum = dateStats.minimum; - } else if (maximum < dateStats.maximum) { + } + if (maximum < dateStats.maximum) { maximum = dateStats.maximum; } } @@ -767,7 +770,8 @@ void merge(ColumnStatisticsImpl other) { } else if (timestampStats.minimum != null) { if (minimum > timestampStats.minimum) { minimum = timestampStats.minimum; - } else if (maximum < timestampStats.maximum) { + } + if (maximum < timestampStats.maximum) { maximum = timestampStats.maximum; } } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java new file mode 100644 index 0000000..ff06ca4 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.orc; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; +import org.junit.Test; + +import java.sql.Timestamp; + +import static junit.framework.Assert.assertEquals; + +/** + * Test ColumnStatisticsImpl for ORC. + */ +public class TestColumnStatistics { + + @Test + public void testLongMerge() throws Exception { + ObjectInspector inspector = + PrimitiveObjectInspectorFactory.javaIntObjectInspector; + + ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); + ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); + stats1.updateInteger(10); + stats1.updateInteger(10); + stats2.updateInteger(1); + stats2.updateInteger(1000); + stats1.merge(stats2); + IntegerColumnStatistics typed = (IntegerColumnStatistics) stats1; + assertEquals(1, typed.getMinimum()); + assertEquals(1000, typed.getMaximum()); + } + + @Test + public void testDoubleMerge() throws Exception { + ObjectInspector inspector = + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector; + + ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); + ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); + stats1.updateDouble(10.0); + stats1.updateDouble(100.0); + stats2.updateDouble(1.0); + stats2.updateDouble(1000.0); + stats1.merge(stats2); + DoubleColumnStatistics typed = (DoubleColumnStatistics) stats1; + assertEquals(1.0, typed.getMinimum(), 0.001); + assertEquals(1000.0, typed.getMaximum(), 0.001); + } + + + @Test + public void testStringMerge() throws Exception { + ObjectInspector inspector = + PrimitiveObjectInspectorFactory.javaStringObjectInspector; + + ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); + ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); + stats1.updateString(new Text("bob")); + stats1.updateString(new Text("david")); + stats1.updateString(new Text("charles")); + stats2.updateString(new Text("anne")); + stats2.updateString(new Text("erin")); + stats1.merge(stats2); + StringColumnStatistics strStats = (StringColumnStatistics) stats1; + assertEquals("anne", strStats.getMinimum()); + assertEquals("erin", strStats.getMaximum()); + } + + @Test + public void testDateMerge() throws Exception { + ObjectInspector inspector = + PrimitiveObjectInspectorFactory.javaDateObjectInspector; + + ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); + ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); + stats1.updateDate(new DateWritable(1000)); + stats1.updateDate(new DateWritable(100)); + stats2.updateDate(new DateWritable(10)); + stats2.updateDate(new DateWritable(2000)); + stats1.merge(stats2); + DateColumnStatistics dateStats = (DateColumnStatistics) stats1; + assertEquals(new DateWritable(10), dateStats.getMinimum()); + assertEquals(new DateWritable(2000), dateStats.getMaximum()); + } + + @Test + public void testTimestampMerge() throws Exception { + ObjectInspector inspector = + PrimitiveObjectInspectorFactory.javaTimestampObjectInspector; + + ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); + ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); + stats1.updateTimestamp(new Timestamp(10)); + stats1.updateTimestamp(new Timestamp(100)); + stats2.updateTimestamp(new Timestamp(1)); + stats2.updateTimestamp(new Timestamp(1000)); + stats1.merge(stats2); + TimestampColumnStatistics dateStats = (TimestampColumnStatistics) stats1; + assertEquals(1, dateStats.getMinimum().getTime()); + assertEquals(1000, dateStats.getMaximum().getTime()); + } + + @Test + public void testDecimalMerge() throws Exception { + ObjectInspector inspector = + PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector; + + ColumnStatisticsImpl stats1 = ColumnStatisticsImpl.create(inspector); + ColumnStatisticsImpl stats2 = ColumnStatisticsImpl.create(inspector); + stats1.updateDecimal(HiveDecimal.create(10)); + stats1.updateDecimal(HiveDecimal.create(100)); + stats2.updateDecimal(HiveDecimal.create(1)); + stats2.updateDecimal(HiveDecimal.create(1000)); + stats1.merge(stats2); + DecimalColumnStatistics typedStats = (DecimalColumnStatistics) stats1; + assertEquals(1, typedStats.getMinimum().longValue()); + assertEquals(1000, typedStats.getMaximum().longValue()); + } +}