diff --git ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java index e3ab3ac..d4c09c0 100644 --- ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java +++ ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java @@ -15578,6 +15578,21 @@ public Builder removeStripeStats(int index) { * optional uint32 rowIndexStride = 8; */ int getRowIndexStride(); + + // optional string writerTimezone = 9; + /** + * optional string writerTimezone = 9; + */ + boolean hasWriterTimezone(); + /** + * optional string writerTimezone = 9; + */ + java.lang.String getWriterTimezone(); + /** + * optional string writerTimezone = 9; + */ + com.google.protobuf.ByteString + getWriterTimezoneBytes(); } /** * Protobuf type {@code orc.proto.Footer} @@ -15682,6 +15697,11 @@ private Footer( rowIndexStride_ = input.readUInt32(); break; } + case 74: { + bitField0_ |= 0x00000010; + writerTimezone_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -15942,6 +15962,49 @@ public int getRowIndexStride() { return rowIndexStride_; } + // optional string writerTimezone = 9; + public static final int WRITERTIMEZONE_FIELD_NUMBER = 9; + private java.lang.Object writerTimezone_; + /** + * optional string writerTimezone = 9; + */ + public boolean hasWriterTimezone() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + /** + * optional string writerTimezone = 9; + */ + public java.lang.String getWriterTimezone() { + java.lang.Object ref = writerTimezone_; + if (ref instanceof java.lang.String) { + return (java.lang.String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + java.lang.String s = bs.toStringUtf8(); + if (bs.isValidUtf8()) { + writerTimezone_ = s; + } + return s; + } + } + /** + * optional string writerTimezone = 9; + */ + public com.google.protobuf.ByteString + getWriterTimezoneBytes() { + java.lang.Object ref = writerTimezone_; + if (ref instanceof java.lang.String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + writerTimezone_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + private void initFields() { headerLength_ = 0L; contentLength_ = 0L; @@ -15951,6 +16014,7 @@ private void initFields() { numberOfRows_ = 0L; statistics_ = java.util.Collections.emptyList(); rowIndexStride_ = 0; + writerTimezone_ = ""; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -15988,6 +16052,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (((bitField0_ & 0x00000008) == 0x00000008)) { output.writeUInt32(8, rowIndexStride_); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeBytes(9, getWriterTimezoneBytes()); + } getUnknownFields().writeTo(output); } @@ -16029,6 +16096,10 @@ public int getSerializedSize() { size += com.google.protobuf.CodedOutputStream .computeUInt32Size(8, rowIndexStride_); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(9, getWriterTimezoneBytes()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -16181,6 +16252,8 @@ public Builder clear() { } rowIndexStride_ = 0; bitField0_ = (bitField0_ & ~0x00000080); + writerTimezone_ = ""; + bitField0_ = (bitField0_ & ~0x00000100); return this; } @@ -16261,6 +16334,10 @@ public Builder clone() { to_bitField0_ |= 0x00000008; } result.rowIndexStride_ = rowIndexStride_; + if (((from_bitField0_ & 0x00000100) == 0x00000100)) { + to_bitField0_ |= 0x00000010; + } + result.writerTimezone_ = writerTimezone_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -16393,6 +16470,11 @@ public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer other) if (other.hasRowIndexStride()) { setRowIndexStride(other.getRowIndexStride()); } + if (other.hasWriterTimezone()) { + bitField0_ |= 0x00000100; + writerTimezone_ = other.writerTimezone_; + onChanged(); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -17512,6 +17594,80 @@ public Builder clearRowIndexStride() { return this; } + // optional string writerTimezone = 9; + private java.lang.Object writerTimezone_ = ""; + /** + * optional string writerTimezone = 9; + */ + public boolean hasWriterTimezone() { + return ((bitField0_ & 0x00000100) == 0x00000100); + } + /** + * optional string writerTimezone = 9; + */ + public java.lang.String getWriterTimezone() { + java.lang.Object ref = writerTimezone_; + if (!(ref instanceof java.lang.String)) { + java.lang.String s = ((com.google.protobuf.ByteString) ref) + .toStringUtf8(); + writerTimezone_ = s; + return s; + } else { + return (java.lang.String) ref; + } + } + /** + * optional string writerTimezone = 9; + */ + public com.google.protobuf.ByteString + getWriterTimezoneBytes() { + java.lang.Object ref = writerTimezone_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8( + (java.lang.String) ref); + writerTimezone_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + /** + * optional string writerTimezone = 9; + */ + public Builder setWriterTimezone( + java.lang.String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000100; + writerTimezone_ = value; + onChanged(); + return this; + } + /** + * optional string writerTimezone = 9; + */ + public Builder clearWriterTimezone() { + bitField0_ = (bitField0_ & ~0x00000100); + writerTimezone_ = getDefaultInstance().getWriterTimezone(); + onChanged(); + return this; + } + /** + * optional string writerTimezone = 9; + */ + public Builder setWriterTimezoneBytes( + com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000100; + writerTimezone_ = value; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:orc.proto.Footer) } @@ -18940,21 +19096,22 @@ public Builder setMagicBytes( "\002 \001(\014\"A\n\020StripeStatistics\022-\n\010colStats\030\001 " + "\003(\0132\033.orc.proto.ColumnStatistics\"<\n\010Meta" + "data\0220\n\013stripeStats\030\001 \003(\0132\033.orc.proto.St" + - "ripeStatistics\"\222\002\n\006Footer\022\024\n\014headerLengt", + "ripeStatistics\"\252\002\n\006Footer\022\024\n\014headerLengt", "h\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022-\n\007stripe" + "s\030\003 \003(\0132\034.orc.proto.StripeInformation\022\036\n" + "\005types\030\004 \003(\0132\017.orc.proto.Type\022-\n\010metadat" + "a\030\005 \003(\0132\033.orc.proto.UserMetadataItem\022\024\n\014" + "numberOfRows\030\006 \001(\004\022/\n\nstatistics\030\007 \003(\0132\033" + ".orc.proto.ColumnStatistics\022\026\n\016rowIndexS" + - "tride\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014footerLeng" + - "th\030\001 \001(\004\022/\n\013compression\030\002 \001(\0162\032.orc.prot" + - "o.CompressionKind\022\034\n\024compressionBlockSiz" + - "e\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016metadata", - "Length\030\005 \001(\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005m" + - "agic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000" + - "\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\"\n org.a" + - "pache.hadoop.hive.ql.io.orc" + "tride\030\010 \001(\r\022\026\n\016writerTimezone\030\t \001(\t\"\305\001\n\n" + + "PostScript\022\024\n\014footerLength\030\001 \001(\004\022/\n\013comp" + + "ression\030\002 \001(\0162\032.orc.proto.CompressionKin" + + "d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi", + "on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" + + "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" + + "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" + + "APPY\020\002\022\007\n\003LZO\020\003B\"\n org.apache.hadoop.hiv" + + "e.ql.io.orc" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -19092,7 +19249,7 @@ public Builder setMagicBytes( internal_static_orc_proto_Footer_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_orc_proto_Footer_descriptor, - new java.lang.String[] { "HeaderLength", "ContentLength", "Stripes", "Types", "Metadata", "NumberOfRows", "Statistics", "RowIndexStride", }); + new java.lang.String[] { "HeaderLength", "ContentLength", "Stripes", "Types", "Metadata", "NumberOfRows", "Statistics", "RowIndexStride", "WriterTimezone", }); internal_static_orc_proto_PostScript_descriptor = getDescriptor().getMessageTypes().get(22); internal_static_orc_proto_PostScript_fieldAccessorTable = new diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java index 9788c16..50a4f58 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java @@ -77,9 +77,13 @@ public static void main(String[] args) throws Exception { } } + boolean printTimeZone = false; + if (cli.hasOption('t')) { + printTimeZone = true; + } String[] files = cli.getArgs(); if (dumpData) printData(Arrays.asList(files), conf); - else printMetaData(Arrays.asList(files), conf, rowIndexCols); + else printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone); } private static void printData(List files, Configuration conf) throws IOException, @@ -90,13 +94,20 @@ private static void printData(List files, Configuration conf) throws IOE } private static void printMetaData(List files, Configuration conf, - List rowIndexCols) throws IOException { + List rowIndexCols, boolean printTimeZone) throws IOException { for (String filename : files) { System.out.println("Structure for " + filename); Path path = new Path(filename); Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); System.out.println("File Version: " + reader.getFileVersion().getName() + " with " + reader.getWriterVersion()); + if (printTimeZone) { + String tz = reader.getWriterTimeZone(); + if (tz == null || tz.isEmpty()) { + tz = "Unknown"; + } + System.out.println("Writer TimeZone: " + tz); + } RecordReaderImpl rows = (RecordReaderImpl) reader.rows(); System.out.println("Rows: " + reader.getNumberOfRows()); System.out.println("Compression: " + reader.getCompression()); @@ -278,6 +289,13 @@ static Options createOptions() { .withDescription("Should the data be printed") .create('d')); + // to avoid breaking unit tests (when run in different time zones) for file dump, printing + // of timezone is made optional + result.addOption(OptionBuilder + .withLongOpt("timezone") + .withDescription("Print writer's time zone") + .create('t')); + result.addOption(OptionBuilder .withLongOpt("help") .withDescription("print help message") diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java index f85c21b..86b4bd0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java @@ -139,6 +139,12 @@ OrcFile.WriterVersion getWriterVersion(); /** + * Get the time zone id of the writer of this file. + * @return time zone id + */ + String getWriterTimeZone(); + + /** * Options for creating a RecordReader. */ public static class Options { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index 03f8085..711c216 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -199,6 +199,11 @@ public long getContentLength() { } @Override + public String getWriterTimeZone() { + return footer.getWriterTimezone(); + } + + @Override public int getRowIndexStride() { return footer.getRowIndexStride(); } @@ -533,7 +538,7 @@ public RecordReader rowsOptions(Options options) throws IOException { } return new RecordReaderImpl(this.getStripes(), fileSystem, path, options, footer.getTypesList(), codec, bufferSize, - footer.getRowIndexStride(), conf); + footer.getRowIndexStride(), conf, footer.getWriterTimezone()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 458ad21..f23ebf4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -26,12 +26,15 @@ import java.nio.ByteBuffer; import java.sql.Date; import java.sql.Timestamp; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.TimeZone; import java.util.TreeMap; import org.apache.commons.lang.StringUtils; @@ -117,7 +120,7 @@ // an array about which row groups aren't skipped private boolean[] includedRowGroups = null; private final Configuration conf; - + private final String writerTimeZoneId; private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool(); private final ZeroCopyReaderShim zcr; @@ -265,8 +268,8 @@ static int findColumns(String[] columnNames, CompressionCodec codec, int bufferSize, long strideRate, - Configuration conf - ) throws IOException { + Configuration conf, + String writerTimeZoneId) throws IOException { this.path = path; this.file = fileSystem.open(path); this.codec = codec; @@ -274,6 +277,7 @@ static int findColumns(String[] columnNames, this.bufferSize = bufferSize; this.included = options.getInclude(); this.conf = conf; + this.writerTimeZoneId = writerTimeZoneId; this.sarg = options.getSearchArgument(); if (sarg != null) { sargLeaves = sarg.getLeaves(); @@ -319,7 +323,7 @@ static int findColumns(String[] columnNames, firstRow = skippedRows; totalRowCount = rows; - reader = createTreeReader(path, 0, types, included, conf); + reader = createTreeReader(path, 0, types, included, conf, writerTimeZoneId); indexes = new OrcProto.RowIndex[types.size()]; bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()]; rowIndexStride = strideRate; @@ -1042,10 +1046,28 @@ void skipRows(long items) throws IOException { private static class TimestampTreeReader extends TreeReader{ private IntegerReader data = null; private IntegerReader nanos = null; - private final LongColumnVector nanoVector = new LongColumnVector(); + private TimeZone writerTimeZone; + private TimeZone readerTimeZone; + private long base_timestamp; - TimestampTreeReader(Path path, int columnId, Configuration conf) { + TimestampTreeReader(Path path, int columnId, Configuration conf, String writerTimeZoneId) + throws IOException { super(path, columnId, conf); + this.readerTimeZone = TimeZone.getDefault(); + // to make sure new readers read old files in the same way + if (writerTimeZoneId == null || writerTimeZoneId.isEmpty()) { + this.writerTimeZone = readerTimeZone; + } else { + this.writerTimeZone = TimeZone.getTimeZone(writerTimeZoneId); + } + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + sdf.setTimeZone(writerTimeZone); + try { + this.base_timestamp = + sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND; + } catch (ParseException e) { + throw new IOException("Unable to create base timestamp", e); + } } @Override @@ -1087,9 +1109,7 @@ Object next(Object previous) throws IOException { } else { result = (TimestampWritable) previous; } - Timestamp ts = new Timestamp(0); - long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) * - WriterImpl.MILLIS_PER_SECOND; + long millis = (data.next() + base_timestamp) * WriterImpl.MILLIS_PER_SECOND; int newNanos = parseNanos(nanos.next()); // fix the rounding when we divided by 1000. if (millis >= 0) { @@ -1097,7 +1117,12 @@ Object next(Object previous) throws IOException { } else { millis -= newNanos / 1000000; } - ts.setTime(millis); + // Adjust the timezone difference between reader and writer. This will + // also account for day light savings. offset will be 0 if new reader's + // tries to read old files as read & write timezones will be same + long offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis); + long adjustedMillis = millis + offset; + Timestamp ts = new Timestamp(adjustedMillis); ts.setNanos(newNanos); result.set(ts); } @@ -1869,8 +1894,9 @@ Object nextVector(Object previousVector, long batchSize) throws IOException { private final String[] fieldNames; StructTreeReader(Path path, int columnId, - List types, - boolean[] included, Configuration conf) throws IOException { + List types, + boolean[] included, Configuration conf, + String writerTimeZoneId) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); int fieldCount = type.getFieldNamesCount(); @@ -1879,7 +1905,8 @@ Object nextVector(Object previousVector, long batchSize) throws IOException { for(int i=0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(path, subtype, types, included, conf); + this.fields[i] = createTreeReader(path, subtype, types, included, conf, + writerTimeZoneId); } this.fieldNames[i] = type.getFieldNames(i); } @@ -1971,8 +1998,10 @@ void skipRows(long items) throws IOException { private RunLengthByteReader tags; UnionTreeReader(Path path, int columnId, - List types, - boolean[] included, Configuration conf) throws IOException { + List types, + boolean[] included, + Configuration conf, + String writerTimeZoneId) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); int fieldCount = type.getSubtypesCount(); @@ -1980,7 +2009,8 @@ void skipRows(long items) throws IOException { for(int i=0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(path, subtype, types, included, conf); + this.fields[i] = createTreeReader(path, subtype, types, included, conf, + writerTimeZoneId); } } } @@ -2050,12 +2080,14 @@ void skipRows(long items) throws IOException { private IntegerReader lengths = null; ListTreeReader(Path path, int columnId, - List types, - boolean[] included, Configuration conf) throws IOException { + List types, + boolean[] included, + Configuration conf, + String writerTimeZoneId) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); elementReader = createTreeReader(path, type.getSubtypes(0), types, - included, conf); + included, conf, writerTimeZoneId); } @Override @@ -2140,20 +2172,24 @@ void skipRows(long items) throws IOException { private IntegerReader lengths = null; MapTreeReader(Path path, - int columnId, - List types, - boolean[] included, Configuration conf) throws IOException { + int columnId, + List types, + boolean[] included, + Configuration conf, + String writerTimeZoneId) throws IOException { super(path, columnId, conf); OrcProto.Type type = types.get(columnId); int keyColumn = type.getSubtypes(0); int valueColumn = type.getSubtypes(1); if (included == null || included[keyColumn]) { - keyReader = createTreeReader(path, keyColumn, types, included, conf); + keyReader = createTreeReader(path, keyColumn, types, included, conf, + writerTimeZoneId); } else { keyReader = null; } if (included == null || included[valueColumn]) { - valueReader = createTreeReader(path, valueColumn, types, included, conf); + valueReader = createTreeReader(path, valueColumn, types, included, conf, + writerTimeZoneId); } else { valueReader = null; } @@ -2233,11 +2269,11 @@ void skipRows(long items) throws IOException { } private static TreeReader createTreeReader(Path path, - int columnId, - List types, - boolean[] included, - Configuration conf - ) throws IOException { + int columnId, + List types, + boolean[] included, + Configuration conf, + String writerTimeZoneId) throws IOException { OrcProto.Type type = types.get(columnId); switch (type.getKind()) { case BOOLEAN: @@ -2269,7 +2305,7 @@ private static TreeReader createTreeReader(Path path, case BINARY: return new BinaryTreeReader(path, columnId, conf); case TIMESTAMP: - return new TimestampTreeReader(path, columnId, conf); + return new TimestampTreeReader(path, columnId, conf, writerTimeZoneId); case DATE: return new DateTreeReader(path, columnId, conf); case DECIMAL: @@ -2277,13 +2313,13 @@ private static TreeReader createTreeReader(Path path, int scale = type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; return new DecimalTreeReader(path, columnId, precision, scale, conf); case STRUCT: - return new StructTreeReader(path, columnId, types, included, conf); + return new StructTreeReader(path, columnId, types, included, conf, writerTimeZoneId); case LIST: - return new ListTreeReader(path, columnId, types, included, conf); + return new ListTreeReader(path, columnId, types, included, conf, writerTimeZoneId); case MAP: - return new MapTreeReader(path, columnId, types, included, conf); + return new MapTreeReader(path, columnId, types, included, conf, writerTimeZoneId); case UNION: - return new UnionTreeReader(path, columnId, types, included, conf); + return new UnionTreeReader(path, columnId, types, included, conf, writerTimeZoneId); default: throw new IllegalArgumentException("Unsupported type " + type.getKind()); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 79dc5a1..466f740 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -26,10 +26,10 @@ import java.nio.ByteBuffer; import java.sql.Timestamp; import java.util.ArrayList; -import java.util.Arrays; import java.util.EnumSet; import java.util.List; import java.util.Map; +import java.util.TimeZone; import java.util.TreeMap; import org.apache.commons.logging.Log; @@ -152,6 +152,7 @@ private final OrcFile.CompressionStrategy compressionStrategy; private final boolean[] bloomFilterColumns; private final double bloomFilterFpp; + private final String writerTimeZone; WriterImpl(FileSystem fs, Path path, @@ -218,6 +219,7 @@ public Writer getWriter() { throw new IllegalArgumentException("Row stride must be at least " + MIN_ROW_INDEX_STRIDE); } + this.writerTimeZone = TimeZone.getDefault().getID(); // ensure that we are able to handle callbacks before we register ourselves memoryManager.addWriter(path, stripeSize, this); @@ -1511,13 +1513,13 @@ void recordPosition(PositionRecorder recorder) throws IOException { } static final int MILLIS_PER_SECOND = 1000; - static final long BASE_TIMESTAMP = - Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND; + static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00"; private static class TimestampTreeWriter extends TreeWriter { private final IntegerWriter seconds; private final IntegerWriter nanos; private final boolean isDirectV2; + private final long base_timestamp; TimestampTreeWriter(int columnId, ObjectInspector inspector, @@ -1530,6 +1532,8 @@ void recordPosition(PositionRecorder recorder) throws IOException { this.nanos = createIntegerWriter(writer.createStream(id, OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer); recordPosition(rowIndexPosition); + // for unit tests to set different time zones + this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND; } @Override @@ -1550,7 +1554,7 @@ void write(Object obj) throws IOException { ((TimestampObjectInspector) inspector). getPrimitiveJavaObject(obj); indexStatistics.updateTimestamp(val); - seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP); + seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp); nanos.write(formatNanos(val.getNanos())); if (createBloomFilter) { bloomFilter.addLong(val.getTime()); @@ -2340,6 +2344,7 @@ private int writeFooter(long bodyLength) throws IOException { builder.setHeaderLength(headerLength); builder.setNumberOfRows(rowCount); builder.setRowIndexStride(rowIndexStride); + builder.setWriterTimezone(writerTimeZone); // populate raw data size rawDataSize = computeRawDataSize(); // serialize the types diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto index 6d5f482..9cad63f 100644 --- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto +++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto @@ -190,6 +190,7 @@ message Footer { optional uint64 numberOfRows = 6; repeated ColumnStatistics statistics = 7; optional uint32 rowIndexStride = 8; + optional string writerTimezone = 9; } enum CompressionKind { diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java new file mode 100644 index 0000000..70c2b0e --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java @@ -0,0 +1,428 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertNotNull; + +import java.io.File; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hive.common.util.HiveTestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import com.google.common.collect.Lists; + +/** + * + */ +@RunWith(Parameterized.class) +public class TestOrcTimezone1 { + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + Configuration conf; + FileSystem fs; + Path testFilePath; + String writerTimeZone; + String readerTimeZone; + static TimeZone defaultTimeZone = TimeZone.getDefault(); + + public TestOrcTimezone1(String writerTZ, String readerTZ) { + this.writerTimeZone = writerTZ; + this.readerTimeZone = readerTZ; + } + + @Parameterized.Parameters + public static Collection data() { + List result = Arrays.asList(new Object[][]{ + {"GMT-12:00", "GMT+14:00"}, + {"Asia/Novokuznetsk", "America/Jamaica"}, + /* no difference in DST */ + {"America/Los_Angeles", "America/Los_Angeles"}, /* same timezone both with DST */ + {"Europe/Berlin", "Europe/Berlin"}, /* same as above but europe */ + {"America/Phoenix", "Asia/Kolkata"} /* Writer no DST, Reader no DST */, + {"Europe/Berlin", "America/Los_Angeles"} /* Writer DST, Reader DST */, + {"Europe/Berlin", "America/Chicago"} /* Writer DST, Reader DST */, + + /* with DST difference */ + {"Europe/Berlin", "UTC"}, + {"UTC", "Europe/Berlin"} /* Writer no DST, Reader DST */, + {"America/Los_Angeles", "Asia/Kolkata"} /* Writer DST, Reader no DST */, + {"Europe/Berlin", "Asia/Kolkata"} /* Writer DST, Reader no DST */, + + // {"Asia/Saigon", "Pacific/Enderbury"}, + }); + return result; + } + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem() throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestOrcFile." + + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + @After + public void restoreTimeZone() { + TimeZone.setDefault(defaultTimeZone); + } + + @Test + public void testTimestampWriter() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector(Timestamp.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone)); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)); + assertEquals(writerTimeZone, TimeZone.getDefault().getID()); + List ts = Lists.newArrayList(); + ts.add("2003-01-01 01:00:00.000000222"); + ts.add("1996-08-02 09:00:00.723100809"); + ts.add("1999-01-01 02:00:00.999999999"); + ts.add("1995-01-01 03:00:00.688888888"); + ts.add("2002-01-01 04:00:00.1"); + ts.add("2010-03-02 05:00:00.000009001"); + ts.add("2005-01-01 06:00:00.000002229"); + ts.add("2006-01-01 07:00:00.900203003"); + ts.add("2003-01-01 08:00:00.800000007"); + ts.add("1998-11-02 10:00:00.857340643"); + ts.add("2008-10-02 11:00:00.0"); + ts.add("9999-01-01 00:00:00.000999"); + for (String t : ts) { + writer.addRow(Timestamp.valueOf(t)); + } + writer.close(); + + TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone)); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + assertEquals(readerTimeZone, TimeZone.getDefault().getID()); + RecordReader rows = reader.rows(null); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + Timestamp got = ((TimestampWritable) row).getTimestamp(); + assertEquals(ts.get(idx++), got.toString()); + } + rows.close(); + } + + @Test + public void testReadFormat_0_11() throws Exception { + TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone)); + Path oldFilePath = + new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc")); + Reader reader = OrcFile.createReader(oldFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + + int stripeCount = 0; + int rowCount = 0; + long currentOffset = -1; + for(StripeInformation stripe : reader.getStripes()) { + stripeCount += 1; + rowCount += stripe.getNumberOfRows(); + if (currentOffset < 0) { + currentOffset = stripe.getOffset() + stripe.getIndexLength() + + stripe.getDataLength() + stripe.getFooterLength(); + } else { + assertEquals(currentOffset, stripe.getOffset()); + currentOffset += stripe.getIndexLength() + stripe.getDataLength() + + stripe.getFooterLength(); + } + } + assertEquals(reader.getNumberOfRows(), rowCount); + assertEquals(2, stripeCount); + + // check the stats + ColumnStatistics[] stats = reader.getStatistics(); + assertEquals(7500, stats[1].getNumberOfValues()); + assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getFalseCount()); + assertEquals(3750, ((BooleanColumnStatistics) stats[1]).getTrueCount()); + assertEquals("count: 7500 hasNull: false true: 3750", stats[1].toString()); + + assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum()); + assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum()); + assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined()); + assertEquals(11520000, ((IntegerColumnStatistics) stats[3]).getSum()); + assertEquals("count: 7500 hasNull: false min: 1024 max: 2048 sum: 11520000", + stats[3].toString()); + + assertEquals(Long.MAX_VALUE, + ((IntegerColumnStatistics) stats[5]).getMaximum()); + assertEquals(Long.MAX_VALUE, + ((IntegerColumnStatistics) stats[5]).getMinimum()); + assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined()); + assertEquals( + "count: 7500 hasNull: false min: 9223372036854775807 max: 9223372036854775807", + stats[5].toString()); + + assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum()); + assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum()); + assertEquals(-75000.0, ((DoubleColumnStatistics) stats[7]).getSum(), + 0.00001); + assertEquals("count: 7500 hasNull: false min: -15.0 max: -5.0 sum: -75000.0", + stats[7].toString()); + + assertEquals("count: 7500 hasNull: false min: bye max: hi sum: 0", stats[9].toString()); + + // check the inspectors + StructObjectInspector readerInspector = (StructObjectInspector) reader + .getObjectInspector(); + assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory()); + assertEquals("struct>>,list:array>," + + "map:map>,ts:timestamp," + + "decimal1:decimal(38,18)>", readerInspector.getTypeName()); + List fields = readerInspector + .getAllStructFieldRefs(); + BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector + .getStructFieldRef("boolean1").getFieldObjectInspector(); + ByteObjectInspector by = (ByteObjectInspector) readerInspector + .getStructFieldRef("byte1").getFieldObjectInspector(); + ShortObjectInspector sh = (ShortObjectInspector) readerInspector + .getStructFieldRef("short1").getFieldObjectInspector(); + IntObjectInspector in = (IntObjectInspector) readerInspector + .getStructFieldRef("int1").getFieldObjectInspector(); + LongObjectInspector lo = (LongObjectInspector) readerInspector + .getStructFieldRef("long1").getFieldObjectInspector(); + FloatObjectInspector fl = (FloatObjectInspector) readerInspector + .getStructFieldRef("float1").getFieldObjectInspector(); + DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector + .getStructFieldRef("double1").getFieldObjectInspector(); + BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector + .getStructFieldRef("bytes1").getFieldObjectInspector(); + StringObjectInspector st = (StringObjectInspector) readerInspector + .getStructFieldRef("string1").getFieldObjectInspector(); + StructObjectInspector mid = (StructObjectInspector) readerInspector + .getStructFieldRef("middle").getFieldObjectInspector(); + List midFields = mid.getAllStructFieldRefs(); + ListObjectInspector midli = (ListObjectInspector) midFields.get(0) + .getFieldObjectInspector(); + StructObjectInspector inner = (StructObjectInspector) midli + .getListElementObjectInspector(); + List inFields = inner.getAllStructFieldRefs(); + ListObjectInspector li = (ListObjectInspector) readerInspector + .getStructFieldRef("list").getFieldObjectInspector(); + MapObjectInspector ma = (MapObjectInspector) readerInspector + .getStructFieldRef("map").getFieldObjectInspector(); + TimestampObjectInspector tso = (TimestampObjectInspector) readerInspector + .getStructFieldRef("ts").getFieldObjectInspector(); + HiveDecimalObjectInspector dco = (HiveDecimalObjectInspector) readerInspector + .getStructFieldRef("decimal1").getFieldObjectInspector(); + StringObjectInspector mk = (StringObjectInspector) ma + .getMapKeyObjectInspector(); + RecordReader rows = reader.rows(); + Object row = rows.next(null); + assertNotNull(row); + // check the contents of the first row + assertEquals(false, + bo.get(readerInspector.getStructFieldData(row, fields.get(0)))); + assertEquals(1, + by.get(readerInspector.getStructFieldData(row, fields.get(1)))); + assertEquals(1024, + sh.get(readerInspector.getStructFieldData(row, fields.get(2)))); + assertEquals(65536, + in.get(readerInspector.getStructFieldData(row, fields.get(3)))); + assertEquals(Long.MAX_VALUE, + lo.get(readerInspector.getStructFieldData(row, fields.get(4)))); + assertEquals(1.0, + fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001); + assertEquals(-15.0, + dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), + 0.00001); + assertEquals(bytes(0, 1, 2, 3, 4), + bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, + fields.get(7)))); + assertEquals("hi", st.getPrimitiveJavaObject(readerInspector + .getStructFieldData(row, fields.get(8)))); + List midRow = midli.getList(mid.getStructFieldData( + readerInspector.getStructFieldData(row, fields.get(9)), + midFields.get(0))); + assertNotNull(midRow); + assertEquals(2, midRow.size()); + assertEquals(1, + in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0)))); + assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(0), inFields.get(1)))); + assertEquals(2, + in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0)))); + assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(1), inFields.get(1)))); + List list = li.getList(readerInspector.getStructFieldData(row, + fields.get(10))); + assertEquals(2, list.size()); + assertEquals(3, + in.get(inner.getStructFieldData(list.get(0), inFields.get(0)))); + assertEquals("good", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(0), inFields.get(1)))); + assertEquals(4, + in.get(inner.getStructFieldData(list.get(1), inFields.get(0)))); + assertEquals("bad", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(1), inFields.get(1)))); + Map map = ma.getMap(readerInspector.getStructFieldData(row, + fields.get(11))); + assertEquals(0, map.size()); + assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"), + tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(12)))); + assertEquals(HiveDecimal.create("12345678.6547456"), + dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(13)))); + + // check the contents of second row + assertEquals(true, rows.hasNext()); + rows.seekToRow(7499); + row = rows.next(null); + assertEquals(true, + bo.get(readerInspector.getStructFieldData(row, fields.get(0)))); + assertEquals(100, + by.get(readerInspector.getStructFieldData(row, fields.get(1)))); + assertEquals(2048, + sh.get(readerInspector.getStructFieldData(row, fields.get(2)))); + assertEquals(65536, + in.get(readerInspector.getStructFieldData(row, fields.get(3)))); + assertEquals(Long.MAX_VALUE, + lo.get(readerInspector.getStructFieldData(row, fields.get(4)))); + assertEquals(2.0, + fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001); + assertEquals(-5.0, + dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), + 0.00001); + assertEquals(bytes(), bi.getPrimitiveWritableObject(readerInspector + .getStructFieldData(row, fields.get(7)))); + assertEquals("bye", st.getPrimitiveJavaObject(readerInspector + .getStructFieldData(row, fields.get(8)))); + midRow = midli.getList(mid.getStructFieldData( + readerInspector.getStructFieldData(row, fields.get(9)), + midFields.get(0))); + assertNotNull(midRow); + assertEquals(2, midRow.size()); + assertEquals(1, + in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0)))); + assertEquals("bye", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(0), inFields.get(1)))); + assertEquals(2, + in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0)))); + assertEquals("sigh", st.getPrimitiveJavaObject(inner.getStructFieldData( + midRow.get(1), inFields.get(1)))); + list = li.getList(readerInspector.getStructFieldData(row, fields.get(10))); + assertEquals(3, list.size()); + assertEquals(100000000, + in.get(inner.getStructFieldData(list.get(0), inFields.get(0)))); + assertEquals("cat", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(0), inFields.get(1)))); + assertEquals(-100000, + in.get(inner.getStructFieldData(list.get(1), inFields.get(0)))); + assertEquals("in", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(1), inFields.get(1)))); + assertEquals(1234, + in.get(inner.getStructFieldData(list.get(2), inFields.get(0)))); + assertEquals("hat", st.getPrimitiveJavaObject(inner.getStructFieldData( + list.get(2), inFields.get(1)))); + map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11))); + assertEquals(2, map.size()); + boolean[] found = new boolean[2]; + for(Object key : map.keySet()) { + String str = mk.getPrimitiveJavaObject(key); + if (str.equals("chani")) { + assertEquals(false, found[0]); + assertEquals(5, + in.get(inner.getStructFieldData(map.get(key), inFields.get(0)))); + assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData( + map.get(key), inFields.get(1)))); + found[0] = true; + } else if (str.equals("mauddib")) { + assertEquals(false, found[1]); + assertEquals(1, + in.get(inner.getStructFieldData(map.get(key), inFields.get(0)))); + assertEquals(str, st.getPrimitiveJavaObject(inner.getStructFieldData( + map.get(key), inFields.get(1)))); + found[1] = true; + } else { + throw new IllegalArgumentException("Unknown key " + str); + } + } + assertEquals(true, found[0]); + assertEquals(true, found[1]); + assertEquals(Timestamp.valueOf("2000-03-12 15:00:01"), + tso.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(12)))); + assertEquals(HiveDecimal.create("12345678.6547457"), + dco.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, + fields.get(13)))); + + // handle the close up + assertEquals(false, rows.hasNext()); + rows.close(); + } + + private static BytesWritable bytes(int... items) { + BytesWritable result = new BytesWritable(); + result.setSize(items.length); + for(int i=0; i < items.length; ++i) { + result.getBytes()[i] = (byte) items[i]; + } + return result; + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java new file mode 100644 index 0000000..387e08e --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone2.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.orc; + +import static junit.framework.Assert.assertEquals; + +import java.io.File; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Random; +import java.util.TimeZone; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import com.google.common.collect.Lists; + +/** + * + */ +@RunWith(Parameterized.class) +public class TestOrcTimezone2 { + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + Configuration conf; + FileSystem fs; + Path testFilePath; + String writerTimeZone; + String readerTimeZone; + static TimeZone defaultTimeZone = TimeZone.getDefault(); + + public TestOrcTimezone2(String writerTZ, String readerTZ) { + this.writerTimeZone = writerTZ; + this.readerTimeZone = readerTZ; + } + + @Parameterized.Parameters + public static Collection data() { + String[] allTimeZones = TimeZone.getAvailableIDs(); + Random rand = new Random(123); + int len = allTimeZones.length; + int n = 400; + Object[][] data = new Object[n][]; + for (int i = 0; i < n; i++) { + int wIdx = rand.nextInt(len); + int rIdx = rand.nextInt(len); + data[i] = new Object[2]; + data[i][0] = allTimeZones[wIdx]; + data[i][1] = allTimeZones[rIdx]; + } + return Arrays.asList(data); + } + + @Rule + public TestName testCaseName = new TestName(); + + @Before + public void openFileSystem() throws Exception { + conf = new Configuration(); + fs = FileSystem.getLocal(conf); + testFilePath = new Path(workDir, "TestOrcFile." + + testCaseName.getMethodName() + ".orc"); + fs.delete(testFilePath, false); + } + + @After + public void restoreTimeZone() { + TimeZone.setDefault(defaultTimeZone); + } + + @Test + public void testTimestampWriter() throws Exception { + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector(Timestamp.class, + ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + TimeZone.setDefault(TimeZone.getTimeZone(writerTimeZone)); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000)); + assertEquals(writerTimeZone, TimeZone.getDefault().getID()); + List ts = Lists.newArrayList(); + ts.add("2003-01-01 01:00:00.000000222"); + ts.add("1999-01-01 02:00:00.999999999"); + ts.add("1995-01-01 03:00:00.688888888"); + ts.add("2002-01-01 04:00:00.1"); + ts.add("2010-03-02 05:00:00.000009001"); + ts.add("2005-01-01 06:00:00.000002229"); + ts.add("2006-01-01 07:00:00.900203003"); + ts.add("2003-01-01 08:00:00.800000007"); + ts.add("1996-08-02 09:00:00.723100809"); + ts.add("1998-11-02 10:00:00.857340643"); + ts.add("2008-10-02 11:00:00.0"); + ts.add("9999-01-01 00:00:00.000999"); + for (String t : ts) { + writer.addRow(Timestamp.valueOf(t)); + } + writer.close(); + + TimeZone.setDefault(TimeZone.getTimeZone(readerTimeZone)); + Reader reader = OrcFile.createReader(testFilePath, + OrcFile.readerOptions(conf).filesystem(fs)); + assertEquals(readerTimeZone, TimeZone.getDefault().getID()); + RecordReader rows = reader.rows(null); + int idx = 0; + while (rows.hasNext()) { + Object row = rows.next(null); + Timestamp got = ((TimestampWritable) row).getTimestamp(); + assertEquals(ts.get(idx++), got.toString()); + } + rows.close(); + } +}