diff --git ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
index e3ab3ac..d4c09c0 100644
--- ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
+++ ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
@@ -15578,6 +15578,21 @@ public Builder removeStripeStats(int index) {
* optional uint32 rowIndexStride = 8;
*/
int getRowIndexStride();
+
+ // optional string writerTimezone = 9;
+ /**
+ * optional string writerTimezone = 9;
+ */
+ boolean hasWriterTimezone();
+ /**
+ * optional string writerTimezone = 9;
+ */
+ java.lang.String getWriterTimezone();
+ /**
+ * optional string writerTimezone = 9;
+ */
+ com.google.protobuf.ByteString
+ getWriterTimezoneBytes();
}
/**
* Protobuf type {@code orc.proto.Footer}
@@ -15682,6 +15697,11 @@ private Footer(
rowIndexStride_ = input.readUInt32();
break;
}
+ case 74: {
+ bitField0_ |= 0x00000010;
+ writerTimezone_ = input.readBytes();
+ break;
+ }
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -15942,6 +15962,49 @@ public int getRowIndexStride() {
return rowIndexStride_;
}
+ // optional string writerTimezone = 9;
+ public static final int WRITERTIMEZONE_FIELD_NUMBER = 9;
+ private java.lang.Object writerTimezone_;
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public boolean hasWriterTimezone() {
+ return ((bitField0_ & 0x00000010) == 0x00000010);
+ }
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public java.lang.String getWriterTimezone() {
+ java.lang.Object ref = writerTimezone_;
+ if (ref instanceof java.lang.String) {
+ return (java.lang.String) ref;
+ } else {
+ com.google.protobuf.ByteString bs =
+ (com.google.protobuf.ByteString) ref;
+ java.lang.String s = bs.toStringUtf8();
+ if (bs.isValidUtf8()) {
+ writerTimezone_ = s;
+ }
+ return s;
+ }
+ }
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public com.google.protobuf.ByteString
+ getWriterTimezoneBytes() {
+ java.lang.Object ref = writerTimezone_;
+ if (ref instanceof java.lang.String) {
+ com.google.protobuf.ByteString b =
+ com.google.protobuf.ByteString.copyFromUtf8(
+ (java.lang.String) ref);
+ writerTimezone_ = b;
+ return b;
+ } else {
+ return (com.google.protobuf.ByteString) ref;
+ }
+ }
+
private void initFields() {
headerLength_ = 0L;
contentLength_ = 0L;
@@ -15951,6 +16014,7 @@ private void initFields() {
numberOfRows_ = 0L;
statistics_ = java.util.Collections.emptyList();
rowIndexStride_ = 0;
+ writerTimezone_ = "";
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -15988,6 +16052,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output)
if (((bitField0_ & 0x00000008) == 0x00000008)) {
output.writeUInt32(8, rowIndexStride_);
}
+ if (((bitField0_ & 0x00000010) == 0x00000010)) {
+ output.writeBytes(9, getWriterTimezoneBytes());
+ }
getUnknownFields().writeTo(output);
}
@@ -16029,6 +16096,10 @@ public int getSerializedSize() {
size += com.google.protobuf.CodedOutputStream
.computeUInt32Size(8, rowIndexStride_);
}
+ if (((bitField0_ & 0x00000010) == 0x00000010)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeBytesSize(9, getWriterTimezoneBytes());
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -16181,6 +16252,8 @@ public Builder clear() {
}
rowIndexStride_ = 0;
bitField0_ = (bitField0_ & ~0x00000080);
+ writerTimezone_ = "";
+ bitField0_ = (bitField0_ & ~0x00000100);
return this;
}
@@ -16261,6 +16334,10 @@ public Builder clone() {
to_bitField0_ |= 0x00000008;
}
result.rowIndexStride_ = rowIndexStride_;
+ if (((from_bitField0_ & 0x00000100) == 0x00000100)) {
+ to_bitField0_ |= 0x00000010;
+ }
+ result.writerTimezone_ = writerTimezone_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -16393,6 +16470,11 @@ public Builder mergeFrom(org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer other)
if (other.hasRowIndexStride()) {
setRowIndexStride(other.getRowIndexStride());
}
+ if (other.hasWriterTimezone()) {
+ bitField0_ |= 0x00000100;
+ writerTimezone_ = other.writerTimezone_;
+ onChanged();
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -17512,6 +17594,80 @@ public Builder clearRowIndexStride() {
return this;
}
+ // optional string writerTimezone = 9;
+ private java.lang.Object writerTimezone_ = "";
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public boolean hasWriterTimezone() {
+ return ((bitField0_ & 0x00000100) == 0x00000100);
+ }
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public java.lang.String getWriterTimezone() {
+ java.lang.Object ref = writerTimezone_;
+ if (!(ref instanceof java.lang.String)) {
+ java.lang.String s = ((com.google.protobuf.ByteString) ref)
+ .toStringUtf8();
+ writerTimezone_ = s;
+ return s;
+ } else {
+ return (java.lang.String) ref;
+ }
+ }
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public com.google.protobuf.ByteString
+ getWriterTimezoneBytes() {
+ java.lang.Object ref = writerTimezone_;
+ if (ref instanceof String) {
+ com.google.protobuf.ByteString b =
+ com.google.protobuf.ByteString.copyFromUtf8(
+ (java.lang.String) ref);
+ writerTimezone_ = b;
+ return b;
+ } else {
+ return (com.google.protobuf.ByteString) ref;
+ }
+ }
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public Builder setWriterTimezone(
+ java.lang.String value) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ bitField0_ |= 0x00000100;
+ writerTimezone_ = value;
+ onChanged();
+ return this;
+ }
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public Builder clearWriterTimezone() {
+ bitField0_ = (bitField0_ & ~0x00000100);
+ writerTimezone_ = getDefaultInstance().getWriterTimezone();
+ onChanged();
+ return this;
+ }
+ /**
+ * optional string writerTimezone = 9;
+ */
+ public Builder setWriterTimezoneBytes(
+ com.google.protobuf.ByteString value) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ bitField0_ |= 0x00000100;
+ writerTimezone_ = value;
+ onChanged();
+ return this;
+ }
+
// @@protoc_insertion_point(builder_scope:orc.proto.Footer)
}
@@ -18940,21 +19096,22 @@ public Builder setMagicBytes(
"\002 \001(\014\"A\n\020StripeStatistics\022-\n\010colStats\030\001 " +
"\003(\0132\033.orc.proto.ColumnStatistics\"<\n\010Meta" +
"data\0220\n\013stripeStats\030\001 \003(\0132\033.orc.proto.St" +
- "ripeStatistics\"\222\002\n\006Footer\022\024\n\014headerLengt",
+ "ripeStatistics\"\252\002\n\006Footer\022\024\n\014headerLengt",
"h\030\001 \001(\004\022\025\n\rcontentLength\030\002 \001(\004\022-\n\007stripe" +
"s\030\003 \003(\0132\034.orc.proto.StripeInformation\022\036\n" +
"\005types\030\004 \003(\0132\017.orc.proto.Type\022-\n\010metadat" +
"a\030\005 \003(\0132\033.orc.proto.UserMetadataItem\022\024\n\014" +
"numberOfRows\030\006 \001(\004\022/\n\nstatistics\030\007 \003(\0132\033" +
".orc.proto.ColumnStatistics\022\026\n\016rowIndexS" +
- "tride\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014footerLeng" +
- "th\030\001 \001(\004\022/\n\013compression\030\002 \001(\0162\032.orc.prot" +
- "o.CompressionKind\022\034\n\024compressionBlockSiz" +
- "e\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016metadata",
- "Length\030\005 \001(\004\022\025\n\rwriterVersion\030\006 \001(\r\022\016\n\005m" +
- "agic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000" +
- "\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003B\"\n org.a" +
- "pache.hadoop.hive.ql.io.orc"
+ "tride\030\010 \001(\r\022\026\n\016writerTimezone\030\t \001(\t\"\305\001\n\n" +
+ "PostScript\022\024\n\014footerLength\030\001 \001(\004\022/\n\013comp" +
+ "ression\030\002 \001(\0162\032.orc.proto.CompressionKin" +
+ "d\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007versi",
+ "on\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\025\n\r" +
+ "writerVersion\030\006 \001(\r\022\016\n\005magic\030\300> \001(\t*:\n\017C" +
+ "ompressionKind\022\010\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SN" +
+ "APPY\020\002\022\007\n\003LZO\020\003B\"\n org.apache.hadoop.hiv" +
+ "e.ql.io.orc"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -19092,7 +19249,7 @@ public Builder setMagicBytes(
internal_static_orc_proto_Footer_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_orc_proto_Footer_descriptor,
- new java.lang.String[] { "HeaderLength", "ContentLength", "Stripes", "Types", "Metadata", "NumberOfRows", "Statistics", "RowIndexStride", });
+ new java.lang.String[] { "HeaderLength", "ContentLength", "Stripes", "Types", "Metadata", "NumberOfRows", "Statistics", "RowIndexStride", "WriterTimezone", });
internal_static_orc_proto_PostScript_descriptor =
getDescriptor().getMessageTypes().get(22);
internal_static_orc_proto_PostScript_fieldAccessorTable = new
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
index 9788c16..50a4f58 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/FileDump.java
@@ -77,9 +77,13 @@ public static void main(String[] args) throws Exception {
}
}
+ boolean printTimeZone = false;
+ if (cli.hasOption('t')) {
+ printTimeZone = true;
+ }
String[] files = cli.getArgs();
if (dumpData) printData(Arrays.asList(files), conf);
- else printMetaData(Arrays.asList(files), conf, rowIndexCols);
+ else printMetaData(Arrays.asList(files), conf, rowIndexCols, printTimeZone);
}
private static void printData(List files, Configuration conf) throws IOException,
@@ -90,13 +94,20 @@ private static void printData(List files, Configuration conf) throws IOE
}
private static void printMetaData(List files, Configuration conf,
- List rowIndexCols) throws IOException {
+ List rowIndexCols, boolean printTimeZone) throws IOException {
for (String filename : files) {
System.out.println("Structure for " + filename);
Path path = new Path(filename);
Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(conf));
System.out.println("File Version: " + reader.getFileVersion().getName() +
" with " + reader.getWriterVersion());
+ if (printTimeZone) {
+ String tz = reader.getWriterTimeZone();
+ if (tz == null || tz.isEmpty()) {
+ tz = "Unknown";
+ }
+ System.out.println("Writer TimeZone: " + tz);
+ }
RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
System.out.println("Rows: " + reader.getNumberOfRows());
System.out.println("Compression: " + reader.getCompression());
@@ -278,6 +289,13 @@ static Options createOptions() {
.withDescription("Should the data be printed")
.create('d'));
+ // to avoid breaking unit tests (when run in different time zones) for file dump, printing
+ // of timezone is made optional
+ result.addOption(OptionBuilder
+ .withLongOpt("timezone")
+ .withDescription("Print writer's time zone")
+ .create('t'));
+
result.addOption(OptionBuilder
.withLongOpt("help")
.withDescription("print help message")
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
index f85c21b..86b4bd0 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java
@@ -139,6 +139,12 @@
OrcFile.WriterVersion getWriterVersion();
/**
+ * Get the time zone id of the writer of this file.
+ * @return time zone id
+ */
+ String getWriterTimeZone();
+
+ /**
* Options for creating a RecordReader.
*/
public static class Options {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
index 03f8085..711c216 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
@@ -199,6 +199,11 @@ public long getContentLength() {
}
@Override
+ public String getWriterTimeZone() {
+ return footer.getWriterTimezone();
+ }
+
+ @Override
public int getRowIndexStride() {
return footer.getRowIndexStride();
}
@@ -533,7 +538,7 @@ public RecordReader rowsOptions(Options options) throws IOException {
}
return new RecordReaderImpl(this.getStripes(), fileSystem, path,
options, footer.getTypesList(), codec, bufferSize,
- footer.getRowIndexStride(), conf);
+ footer.getRowIndexStride(), conf, footer.getWriterTimezone());
}
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
index 458ad21..f23ebf4 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
@@ -26,12 +26,15 @@
import java.nio.ByteBuffer;
import java.sql.Date;
import java.sql.Timestamp;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.TimeZone;
import java.util.TreeMap;
import org.apache.commons.lang.StringUtils;
@@ -117,7 +120,7 @@
// an array about which row groups aren't skipped
private boolean[] includedRowGroups = null;
private final Configuration conf;
-
+ private final String writerTimeZoneId;
private final ByteBufferAllocatorPool pool = new ByteBufferAllocatorPool();
private final ZeroCopyReaderShim zcr;
@@ -265,8 +268,8 @@ static int findColumns(String[] columnNames,
CompressionCodec codec,
int bufferSize,
long strideRate,
- Configuration conf
- ) throws IOException {
+ Configuration conf,
+ String writerTimeZoneId) throws IOException {
this.path = path;
this.file = fileSystem.open(path);
this.codec = codec;
@@ -274,6 +277,7 @@ static int findColumns(String[] columnNames,
this.bufferSize = bufferSize;
this.included = options.getInclude();
this.conf = conf;
+ this.writerTimeZoneId = writerTimeZoneId;
this.sarg = options.getSearchArgument();
if (sarg != null) {
sargLeaves = sarg.getLeaves();
@@ -319,7 +323,7 @@ static int findColumns(String[] columnNames,
firstRow = skippedRows;
totalRowCount = rows;
- reader = createTreeReader(path, 0, types, included, conf);
+ reader = createTreeReader(path, 0, types, included, conf, writerTimeZoneId);
indexes = new OrcProto.RowIndex[types.size()];
bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()];
rowIndexStride = strideRate;
@@ -1042,10 +1046,28 @@ void skipRows(long items) throws IOException {
private static class TimestampTreeReader extends TreeReader{
private IntegerReader data = null;
private IntegerReader nanos = null;
- private final LongColumnVector nanoVector = new LongColumnVector();
+ private TimeZone writerTimeZone;
+ private TimeZone readerTimeZone;
+ private long base_timestamp;
- TimestampTreeReader(Path path, int columnId, Configuration conf) {
+ TimestampTreeReader(Path path, int columnId, Configuration conf, String writerTimeZoneId)
+ throws IOException {
super(path, columnId, conf);
+ this.readerTimeZone = TimeZone.getDefault();
+ // to make sure new readers read old files in the same way
+ if (writerTimeZoneId == null || writerTimeZoneId.isEmpty()) {
+ this.writerTimeZone = readerTimeZone;
+ } else {
+ this.writerTimeZone = TimeZone.getTimeZone(writerTimeZoneId);
+ }
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ sdf.setTimeZone(writerTimeZone);
+ try {
+ this.base_timestamp =
+ sdf.parse(WriterImpl.BASE_TIMESTAMP_STRING).getTime() / WriterImpl.MILLIS_PER_SECOND;
+ } catch (ParseException e) {
+ throw new IOException("Unable to create base timestamp", e);
+ }
}
@Override
@@ -1087,9 +1109,7 @@ Object next(Object previous) throws IOException {
} else {
result = (TimestampWritable) previous;
}
- Timestamp ts = new Timestamp(0);
- long millis = (data.next() + WriterImpl.BASE_TIMESTAMP) *
- WriterImpl.MILLIS_PER_SECOND;
+ long millis = (data.next() + base_timestamp) * WriterImpl.MILLIS_PER_SECOND;
int newNanos = parseNanos(nanos.next());
// fix the rounding when we divided by 1000.
if (millis >= 0) {
@@ -1097,7 +1117,12 @@ Object next(Object previous) throws IOException {
} else {
millis -= newNanos / 1000000;
}
- ts.setTime(millis);
+ // Adjust the timezone difference between reader and writer. This will
+ // also account for day light savings. offset will be 0 if new reader's
+ // tries to read old files as read & write timezones will be same
+ long offset = writerTimeZone.getOffset(millis) - readerTimeZone.getOffset(millis);
+ long adjustedMillis = millis + offset;
+ Timestamp ts = new Timestamp(adjustedMillis);
ts.setNanos(newNanos);
result.set(ts);
}
@@ -1869,8 +1894,9 @@ Object nextVector(Object previousVector, long batchSize) throws IOException {
private final String[] fieldNames;
StructTreeReader(Path path, int columnId,
- List types,
- boolean[] included, Configuration conf) throws IOException {
+ List types,
+ boolean[] included, Configuration conf,
+ String writerTimeZoneId) throws IOException {
super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getFieldNamesCount();
@@ -1879,7 +1905,8 @@ Object nextVector(Object previousVector, long batchSize) throws IOException {
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(path, subtype, types, included, conf);
+ this.fields[i] = createTreeReader(path, subtype, types, included, conf,
+ writerTimeZoneId);
}
this.fieldNames[i] = type.getFieldNames(i);
}
@@ -1971,8 +1998,10 @@ void skipRows(long items) throws IOException {
private RunLengthByteReader tags;
UnionTreeReader(Path path, int columnId,
- List types,
- boolean[] included, Configuration conf) throws IOException {
+ List types,
+ boolean[] included,
+ Configuration conf,
+ String writerTimeZoneId) throws IOException {
super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getSubtypesCount();
@@ -1980,7 +2009,8 @@ void skipRows(long items) throws IOException {
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(path, subtype, types, included, conf);
+ this.fields[i] = createTreeReader(path, subtype, types, included, conf,
+ writerTimeZoneId);
}
}
}
@@ -2050,12 +2080,14 @@ void skipRows(long items) throws IOException {
private IntegerReader lengths = null;
ListTreeReader(Path path, int columnId,
- List types,
- boolean[] included, Configuration conf) throws IOException {
+ List types,
+ boolean[] included,
+ Configuration conf,
+ String writerTimeZoneId) throws IOException {
super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
elementReader = createTreeReader(path, type.getSubtypes(0), types,
- included, conf);
+ included, conf, writerTimeZoneId);
}
@Override
@@ -2140,20 +2172,24 @@ void skipRows(long items) throws IOException {
private IntegerReader lengths = null;
MapTreeReader(Path path,
- int columnId,
- List types,
- boolean[] included, Configuration conf) throws IOException {
+ int columnId,
+ List types,
+ boolean[] included,
+ Configuration conf,
+ String writerTimeZoneId) throws IOException {
super(path, columnId, conf);
OrcProto.Type type = types.get(columnId);
int keyColumn = type.getSubtypes(0);
int valueColumn = type.getSubtypes(1);
if (included == null || included[keyColumn]) {
- keyReader = createTreeReader(path, keyColumn, types, included, conf);
+ keyReader = createTreeReader(path, keyColumn, types, included, conf,
+ writerTimeZoneId);
} else {
keyReader = null;
}
if (included == null || included[valueColumn]) {
- valueReader = createTreeReader(path, valueColumn, types, included, conf);
+ valueReader = createTreeReader(path, valueColumn, types, included, conf,
+ writerTimeZoneId);
} else {
valueReader = null;
}
@@ -2233,11 +2269,11 @@ void skipRows(long items) throws IOException {
}
private static TreeReader createTreeReader(Path path,
- int columnId,
- List types,
- boolean[] included,
- Configuration conf
- ) throws IOException {
+ int columnId,
+ List types,
+ boolean[] included,
+ Configuration conf,
+ String writerTimeZoneId) throws IOException {
OrcProto.Type type = types.get(columnId);
switch (type.getKind()) {
case BOOLEAN:
@@ -2269,7 +2305,7 @@ private static TreeReader createTreeReader(Path path,
case BINARY:
return new BinaryTreeReader(path, columnId, conf);
case TIMESTAMP:
- return new TimestampTreeReader(path, columnId, conf);
+ return new TimestampTreeReader(path, columnId, conf, writerTimeZoneId);
case DATE:
return new DateTreeReader(path, columnId, conf);
case DECIMAL:
@@ -2277,13 +2313,13 @@ private static TreeReader createTreeReader(Path path,
int scale = type.hasScale()? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE;
return new DecimalTreeReader(path, columnId, precision, scale, conf);
case STRUCT:
- return new StructTreeReader(path, columnId, types, included, conf);
+ return new StructTreeReader(path, columnId, types, included, conf, writerTimeZoneId);
case LIST:
- return new ListTreeReader(path, columnId, types, included, conf);
+ return new ListTreeReader(path, columnId, types, included, conf, writerTimeZoneId);
case MAP:
- return new MapTreeReader(path, columnId, types, included, conf);
+ return new MapTreeReader(path, columnId, types, included, conf, writerTimeZoneId);
case UNION:
- return new UnionTreeReader(path, columnId, types, included, conf);
+ return new UnionTreeReader(path, columnId, types, included, conf, writerTimeZoneId);
default:
throw new IllegalArgumentException("Unsupported type " +
type.getKind());
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
index 79dc5a1..466f740 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
@@ -26,10 +26,10 @@
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
+import java.util.TimeZone;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
@@ -152,6 +152,7 @@
private final OrcFile.CompressionStrategy compressionStrategy;
private final boolean[] bloomFilterColumns;
private final double bloomFilterFpp;
+ private final String writerTimeZone;
WriterImpl(FileSystem fs,
Path path,
@@ -218,6 +219,7 @@ public Writer getWriter() {
throw new IllegalArgumentException("Row stride must be at least " +
MIN_ROW_INDEX_STRIDE);
}
+ this.writerTimeZone = TimeZone.getDefault().getID();
// ensure that we are able to handle callbacks before we register ourselves
memoryManager.addWriter(path, stripeSize, this);
@@ -1511,13 +1513,13 @@ void recordPosition(PositionRecorder recorder) throws IOException {
}
static final int MILLIS_PER_SECOND = 1000;
- static final long BASE_TIMESTAMP =
- Timestamp.valueOf("2015-01-01 00:00:00").getTime() / MILLIS_PER_SECOND;
+ static final String BASE_TIMESTAMP_STRING = "2015-01-01 00:00:00";
private static class TimestampTreeWriter extends TreeWriter {
private final IntegerWriter seconds;
private final IntegerWriter nanos;
private final boolean isDirectV2;
+ private final long base_timestamp;
TimestampTreeWriter(int columnId,
ObjectInspector inspector,
@@ -1530,6 +1532,8 @@ void recordPosition(PositionRecorder recorder) throws IOException {
this.nanos = createIntegerWriter(writer.createStream(id,
OrcProto.Stream.Kind.SECONDARY), false, isDirectV2, writer);
recordPosition(rowIndexPosition);
+ // for unit tests to set different time zones
+ this.base_timestamp = Timestamp.valueOf(BASE_TIMESTAMP_STRING).getTime() / MILLIS_PER_SECOND;
}
@Override
@@ -1550,7 +1554,7 @@ void write(Object obj) throws IOException {
((TimestampObjectInspector) inspector).
getPrimitiveJavaObject(obj);
indexStatistics.updateTimestamp(val);
- seconds.write((val.getTime() / MILLIS_PER_SECOND) - BASE_TIMESTAMP);
+ seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp);
nanos.write(formatNanos(val.getNanos()));
if (createBloomFilter) {
bloomFilter.addLong(val.getTime());
@@ -2340,6 +2344,7 @@ private int writeFooter(long bodyLength) throws IOException {
builder.setHeaderLength(headerLength);
builder.setNumberOfRows(rowCount);
builder.setRowIndexStride(rowIndexStride);
+ builder.setWriterTimezone(writerTimeZone);
// populate raw data size
rawDataSize = computeRawDataSize();
// serialize the types
diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
index 6d5f482..9cad63f 100644
--- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
+++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
@@ -190,6 +190,7 @@ message Footer {
optional uint64 numberOfRows = 6;
repeated ColumnStatistics statistics = 7;
optional uint32 rowIndexStride = 8;
+ optional string writerTimezone = 9;
}
enum CompressionKind {
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
new file mode 100644
index 0000000..70c2b0e
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcTimezone1.java
@@ -0,0 +1,428 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNotNull;
+
+import java.io.File;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hive.common.util.HiveTestUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.google.common.collect.Lists;
+
+/**
+ *
+ */
+@RunWith(Parameterized.class)
+public class TestOrcTimezone1 {
+ Path workDir = new Path(System.getProperty("test.tmp.dir",
+ "target" + File.separator + "test" + File.separator + "tmp"));
+ Configuration conf;
+ FileSystem fs;
+ Path testFilePath;
+ String writerTimeZone;
+ String readerTimeZone;
+ static TimeZone defaultTimeZone = TimeZone.getDefault();
+
+ public TestOrcTimezone1(String writerTZ, String readerTZ) {
+ this.writerTimeZone = writerTZ;
+ this.readerTimeZone = readerTZ;
+ }
+
+ @Parameterized.Parameters
+ public static Collection