diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index 73cf0f5..67ce151 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -17,7 +17,8 @@ import java.sql.Timestamp; import java.util.ArrayList; -import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -30,7 +31,6 @@ import org.apache.hadoop.io.Writable; import parquet.column.Dictionary; -import parquet.example.data.simple.NanoTime; import parquet.io.api.Binary; import parquet.io.api.Converter; import parquet.io.api.PrimitiveConverter; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java new file mode 100644 index 0000000..85cf5e7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTime.java @@ -0,0 +1,63 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.timestamp; + +import java.nio.ByteBuffer; + +import parquet.Preconditions; +import parquet.io.api.Binary; +import parquet.io.api.RecordConsumer; +/** + * Provides a wrapper representing a parquet-timestamp, with methods to + * convert to and from binary. + */ +public class NanoTime { + private final int julianDay; + private final long timeOfDayNanos; + public static NanoTime fromBinary(Binary bytes) { + Preconditions.checkArgument(bytes.length() == 12, "Must be 12 bytes"); + ByteBuffer buf = bytes.toByteBuffer(); + return new NanoTime(buf.getInt(), buf.getLong()); + } + + public NanoTime(int julianDay, long timeOfDayNanos) { + this.julianDay = julianDay; + this.timeOfDayNanos = timeOfDayNanos; + } + + public int getJulianDay() { + return julianDay; + } + + public long getTimeOfDayNanos() { + return timeOfDayNanos; + } + + public Binary toBinary() { + ByteBuffer buf = ByteBuffer.allocate(12); + buf.putInt(julianDay); + buf.putLong(timeOfDayNanos); + buf.flip(); + return Binary.fromByteBuffer(buf); + } + + public void writeValue(RecordConsumer recordConsumer) { + recordConsumer.addBinary(toBinary()); + } + + @Override + public String toString() { + return "NanoTime{julianDay="+julianDay+", timeOfDayNanos="+timeOfDayNanos+"}"; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java new file mode 100644 index 0000000..c647b24 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -0,0 +1,84 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.timestamp; + +import java.sql.Timestamp; +import java.util.Calendar; +import java.util.TimeZone; + +import jodd.datetime.JDateTime; + +/** + * Utilities for converting from java.sql.Timestamp to parquet timestamp. + * This utilizes the Jodd library. + */ +public class NanoTimeUtils { + static final long NANOS_PER_SECOND = 1000000000; + static final long SECONDS_PER_MINUTE = 60; + static final long MINUTES_PER_HOUR = 60; + + private static final ThreadLocal parquetTsCalendar = new ThreadLocal(); + + private static Calendar getCalendar() { + //Calendar.getInstance calculates the current-time needlessly, so cache an instance. + if (parquetTsCalendar.get() == null) { + parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); + } + return parquetTsCalendar.get(); + } + + public static NanoTime getNanoTime(Timestamp ts) { + + Calendar calendar = getCalendar(); + calendar.setTime(ts); + JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR), + calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1. + calendar.get(Calendar.DAY_OF_MONTH)); + int days = jDateTime.getJulianDayNumber(); + + long hour = calendar.get(Calendar.HOUR_OF_DAY); + long minute = calendar.get(Calendar.MINUTE); + long second = calendar.get(Calendar.SECOND); + long nanos = ts.getNanos(); + long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND * SECONDS_PER_MINUTE * minute + + NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour; + return new NanoTime(days, nanosOfDay); + } + + public static Timestamp getTimestamp(NanoTime nt) { + int julianDay = nt.getJulianDay(); + long nanosOfDay = nt.getTimeOfDayNanos(); + + JDateTime jDateTime = new JDateTime((double) julianDay); + Calendar calendar = getCalendar(); + calendar.set(Calendar.YEAR, jDateTime.getYear()); + calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1. + calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay()); + + long remainder = nanosOfDay; + int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR)); + remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR); + int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE)); + remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE); + int seconds = (int) (remainder / (NANOS_PER_SECOND)); + long nanos = remainder % NANOS_PER_SECOND; + + calendar.set(Calendar.HOUR_OF_DAY, hour); + calendar.set(Calendar.MINUTE, minutes); + calendar.set(Calendar.SECOND, seconds); + Timestamp ts = new Timestamp(calendar.getTimeInMillis()); + ts.setNanos((int) nanos); + return ts; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java deleted file mode 100644 index 06987ad..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/utils/NanoTimeUtils.java +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.parquet.utils; - -import java.sql.Date; -import java.sql.Timestamp; -import java.util.Calendar; -import java.util.TimeZone; - -import jodd.datetime.JDateTime; -import jodd.datetime.TimeUtil; -import parquet.example.data.simple.NanoTime; - -/** - * Utilities for converting from java.sql.Timestamp to parquet timestamp. - * This utilizes the Jodd library. - */ -public class NanoTimeUtils { - static final long NANOS_PER_SECOND = 1000000000; - static final long SECONDS_PER_MINUTE = 60; - static final long MINUTES_PER_HOUR = 60; - - private static final ThreadLocal parquetTsCalendar = new ThreadLocal(); - - private static Calendar getCalendar() { - //Calendar.getInstance calculates the current-time needlessly, so cache an instance. - if (parquetTsCalendar.get() == null) { - parquetTsCalendar.set(Calendar.getInstance(TimeZone.getTimeZone("GMT"))); - } - return parquetTsCalendar.get(); - } - - public static NanoTime getNanoTime(Timestamp ts) { - - Calendar calendar = getCalendar(); - calendar.setTime(ts); - JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR), - calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1. - calendar.get(Calendar.DAY_OF_MONTH)); - int days = jDateTime.getJulianDayNumber(); - - long hour = calendar.get(Calendar.HOUR_OF_DAY); - long minute = calendar.get(Calendar.MINUTE); - long second = calendar.get(Calendar.SECOND); - long nanos = ts.getNanos(); - long nanosOfDay = nanos + NANOS_PER_SECOND * second + NANOS_PER_SECOND * SECONDS_PER_MINUTE * minute + - NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR * hour; - return new NanoTime(days, nanosOfDay); - } - - public static Timestamp getTimestamp(NanoTime nt) { - int julianDay = nt.getJulianDay(); - long nanosOfDay = nt.getTimeOfDayNanos(); - - JDateTime jDateTime = new JDateTime((double) julianDay); - Calendar calendar = getCalendar(); - calendar.set(Calendar.YEAR, jDateTime.getYear()); - calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1. - calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay()); - - long remainder = nanosOfDay; - int hour = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR)); - remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE * MINUTES_PER_HOUR); - int minutes = (int) (remainder / (NANOS_PER_SECOND * SECONDS_PER_MINUTE)); - remainder = remainder % (NANOS_PER_SECOND * SECONDS_PER_MINUTE); - int seconds = (int) (remainder / (NANOS_PER_SECOND)); - long nanos = remainder % NANOS_PER_SECOND; - - calendar.set(Calendar.HOUR_OF_DAY, hour); - calendar.set(Calendar.MINUTE, minutes); - calendar.set(Calendar.SECOND, seconds); - Timestamp ts = new Timestamp(calendar.getTimeInMillis()); - ts.setNanos((int) nanos); - return ts; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index 8bb9cb1..c7078ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -15,7 +15,8 @@ import java.sql.Timestamp; -import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -29,7 +30,6 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; -import parquet.example.data.simple.NanoTime; import parquet.io.ParquetEncodingException; import parquet.io.api.Binary; import parquet.io.api.RecordConsumer; diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java index f56a643..1776242 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -21,9 +21,10 @@ import junit.framework.Assert; import junit.framework.TestCase; -import org.apache.hadoop.hive.ql.io.parquet.utils.NanoTimeUtils; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; + -import parquet.example.data.simple.NanoTime; /** * Tests util-libraries used for parquet-timestamp.