diff --git common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java index ed83871..b2af41b 100644 --- common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java +++ common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java @@ -36,32 +36,29 @@ import java.time.format.TextStyle; import java.time.temporal.ChronoField; import java.time.temporal.TemporalAccessor; +import java.util.Date; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This is the internal type for Timestamp with time zone. - * A wrapper of ZonedDateTime which automatically convert the Zone to UTC. * The full qualified input format of Timestamp with time zone is * "yyyy-MM-dd HH:mm:ss[.SSS...] zoneid/zoneoffset", where the time and zone parts are optional. * If time part is absent, a default '00:00:00.0' will be used. * If zone part is absent, the system time zone will be used. - * All timestamp with time zone will be converted and stored as UTC retaining the instant. - * E.g. "2017-04-14 18:00:00 Asia/Shanghai" will be converted to - * "2017-04-14 10:00:00.0 Z". */ public class TimestampTZ implements Comparable { - private static final DateTimeFormatter formatter; - private static final ZoneId UTC = ZoneOffset.UTC; - private static final ZonedDateTime EPOCH = ZonedDateTime.ofInstant(Instant.EPOCH, UTC); + private static final Logger LOG = LoggerFactory.getLogger(TimestampTZ.class); + + private static final ZonedDateTime EPOCH = ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC); private static final LocalTime DEFAULT_LOCAL_TIME = LocalTime.of(0, 0); private static final Pattern SINGLE_DIGIT_PATTERN = Pattern.compile("[\\+-]\\d:\\d\\d"); - private static final Logger LOG = LoggerFactory.getLogger(TimestampTZ.class); private static final ThreadLocal CONVERT_FORMATTER = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")); + private static final DateTimeFormatter formatter; static { DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); // Date part @@ -88,13 +85,18 @@ public TimestampTZ(ZonedDateTime zonedDateTime) { setZonedDateTime(zonedDateTime); } - public TimestampTZ(long seconds, int nanos) { - set(seconds, nanos); + public TimestampTZ(long seconds, int nanos, ZoneId timeZone) { + set(seconds, nanos, timeZone); } - public void set(long seconds, int nanos) { + /** + * Obtains an instance of Instant using seconds from the epoch of 1970-01-01T00:00:00Z and + * nanosecond fraction of second. Then, it creates a zoned date-time with the same instant + * as that specified but in the given time-zone. + */ + public void set(long seconds, int nanos, ZoneId timeZone) { Instant instant = Instant.ofEpochSecond(seconds, nanos); - setZonedDateTime(ZonedDateTime.ofInstant(instant, UTC)); + setZonedDateTime(ZonedDateTime.ofInstant(instant, timeZone)); } public ZonedDateTime getZonedDateTime() { @@ -102,7 +104,7 @@ public ZonedDateTime getZonedDateTime() { } public void setZonedDateTime(ZonedDateTime zonedDateTime) { - this.zonedDateTime = zonedDateTime != null ? zonedDateTime.withZoneSameInstant(UTC) : EPOCH; + this.zonedDateTime = zonedDateTime != null ? zonedDateTime : EPOCH; } @Override @@ -137,6 +139,10 @@ public int getNanos() { } public static TimestampTZ parse(String s) { + return parse(s, null); + } + + public static TimestampTZ parse(String s, ZoneId defaultTimeZone) { // need to handle offset with single digital hour, see JDK-8066806 s = handleSingleDigitHourOffset(s); ZonedDateTime zonedDateTime; @@ -158,13 +164,18 @@ public static TimestampTZ parse(String s) { try { zoneId = ZoneId.from(accessor); } catch (DateTimeException e2) { - // TODO: in future this may come from user specified zone (via set time zone command) - zoneId = ZoneId.systemDefault(); + if (defaultTimeZone == null) { + throw new DateTimeException("Time Zone not available"); + } + zoneId = defaultTimeZone; } zonedDateTime = ZonedDateTime.of(localDate, localTime, zoneId); } - return new TimestampTZ(zonedDateTime); + if (defaultTimeZone == null) { + return new TimestampTZ(zonedDateTime); + } + return new TimestampTZ(zonedDateTime.withZoneSameInstant(defaultTimeZone)); } private static String handleSingleDigitHourOffset(String s) { @@ -176,9 +187,10 @@ private static String handleSingleDigitHourOffset(String s) { return s; } - public static TimestampTZ parseOrNull(String s) { + + public static TimestampTZ parseOrNull(String s, ZoneId defaultTimeZone) { try { - return parse(s); + return parse(s, defaultTimeZone); } catch (DateTimeParseException e) { if (LOG.isDebugEnabled()) { LOG.debug("Invalid string " + s + " for TIMESTAMP WITH TIME ZONE", e); @@ -189,9 +201,8 @@ public static TimestampTZ parseOrNull(String s) { // Converts Date to TimestampTZ. The conversion is done text-wise since // Date/Timestamp should be treated as description of date/time. - public static TimestampTZ convert(java.util.Date date) { + public static TimestampTZ convert(Date date, ZoneId defaultTimeZone) { String s = date instanceof Timestamp ? date.toString() : CONVERT_FORMATTER.get().format(date); - // TODO: in future this may come from user specified zone (via set time zone command) - return parse(s + " " + ZoneId.systemDefault().getId()); + return parse(s, defaultTimeZone); } } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f5e5974..56b3f1d 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -53,6 +53,8 @@ import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; +import java.time.DateTimeException; +import java.time.ZoneId; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -1022,6 +1024,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal // whether session is running in silent mode or not HIVESESSIONSILENT("hive.session.silent", false, ""), + HIVE_SESSION_TIME_ZONE("hive.session.time.zone", "LOCAL", + "Sets the time-zone for displaying and interpreting time stamps. If this property value is set to\n" + + "LOCAL, it is not specified, or it is not a correct time-zone, the system default time-zone will be\n " + + "used instead. Time-zone IDs can be specified as region-based zone IDs (based on IANA time-zone data),\n" + + "abbreviated zone IDs, or offset IDs."), + HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false, "Whether to log Hive query, query plan, runtime statistics etc."), @@ -4300,6 +4308,24 @@ private static String getSQLStdAuthDefaultWhiteListPattern() { } /** + * Obtains the current time-zone ID. + */ + public ZoneId getUserTimeZone() { + String timeZoneStr = getVar(ConfVars.HIVE_SESSION_TIME_ZONE); + if (timeZoneStr == null || timeZoneStr.trim().isEmpty() || + timeZoneStr.trim().toLowerCase().equals("local")) { + // default + return ZoneId.systemDefault(); + } + try { + return ZoneId.of(timeZoneStr); + } catch (DateTimeException e1) { + // default + return ZoneId.systemDefault(); + } + } + + /** * @param paramList list of parameter strings * @return list of parameter strings with "." replaced by "\." */ diff --git common/src/test/org/apache/hadoop/hive/common/type/TestTimestampTZ.java common/src/test/org/apache/hadoop/hive/common/type/TestTimestampTZ.java index 0cef77a..3df39f5 100644 --- common/src/test/org/apache/hadoop/hive/common/type/TestTimestampTZ.java +++ common/src/test/org/apache/hadoop/hive/common/type/TestTimestampTZ.java @@ -22,6 +22,7 @@ import org.junit.Test; import java.sql.Timestamp; +import java.time.ZoneId; import java.time.format.DateTimeParseException; import java.util.TimeZone; @@ -64,13 +65,13 @@ public void testFromToInstant() { TimestampTZ tstz = TimestampTZ.parse(s1); long seconds = tstz.getEpochSecond(); int nanos = tstz.getNanos(); - Assert.assertEquals(tstz, new TimestampTZ(seconds, nanos)); + Assert.assertEquals(tstz, new TimestampTZ(seconds, nanos, ZoneId.of("UTC"))); nanos += 123000000; - Assert.assertEquals("2017-04-14 18:00:00.123 Z", new TimestampTZ(seconds, nanos).toString()); + Assert.assertEquals("2017-04-14 18:00:00.123 Z", new TimestampTZ(seconds, nanos, ZoneId.of("UTC")).toString()); seconds -= 3; - Assert.assertEquals("2017-04-14 17:59:57.123 Z", new TimestampTZ(seconds, nanos).toString()); + Assert.assertEquals("2017-04-14 17:59:57.123 Z", new TimestampTZ(seconds, nanos, ZoneId.of("UTC")).toString()); } @Test @@ -108,10 +109,26 @@ public void testConvertFromTimestamp() { try { // Use system zone when converting from timestamp to timestamptz String s = "2017-06-12 23:12:56.34"; - TimeZone.setDefault(TimeZone.getTimeZone("Europe/London")); - TimestampTZ tstz1 = TimestampTZ.convert(Timestamp.valueOf(s)); - TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")); - TimestampTZ tstz2 = TimestampTZ.convert(Timestamp.valueOf(s)); + TimestampTZ tstz1 = TimestampTZ.convert( + Timestamp.valueOf(s), + TimeZone.getTimeZone("Europe/London").toZoneId()); + TimestampTZ tstz2 = TimestampTZ.convert( + Timestamp.valueOf(s), + TimeZone.getTimeZone("America/Los_Angeles").toZoneId()); + Assert.assertTrue(tstz1.compareTo(tstz2) < 0); + } finally { + TimeZone.setDefault(defaultZone); + } + } + + @Test + public void testConvertFromTimestamp2() { + TimeZone defaultZone = TimeZone.getDefault(); + try { + // Use system zone when converting from timestamp to timestamptz + String s = "2017-06-12 23:12:56.34"; + TimestampTZ tstz1 = TimestampTZ.parse(s + " " + TimeZone.getTimeZone("Europe/London").getID()); + TimestampTZ tstz2 = TimestampTZ.parse(s + " " + TimeZone.getTimeZone("America/Los_Angeles").getID()); Assert.assertTrue(tstz1.compareTo(tstz2) < 0); } finally { TimeZone.setDefault(defaultZone); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java index 8902f6c..9a186fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java @@ -29,6 +29,7 @@ import java.lang.reflect.Field; import java.net.URI; import java.sql.Timestamp; +import java.time.ZoneId; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -46,6 +47,8 @@ import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; @@ -322,7 +325,13 @@ public void write(Kryo kryo, Output output, TimestampTZ object) { public TimestampTZ read(Kryo kryo, Input input, Class type) { long seconds = input.readLong(); int nanos = input.readInt(); - return new TimestampTZ(seconds, nanos); + ZoneId timeZone; + try { + timeZone = Hive.get().getConf().getUserTimeZone(); + } catch (HiveException e) { + throw new RuntimeException(e); + } + return new TimestampTZ(seconds, nanos, timeZone); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 517ce31..48a5dfb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -961,9 +961,9 @@ private static ExprNodeDesc evaluateFunction(GenericUDF udf, List TypeInfo typeInfo = poi.getTypeInfo(); o = poi.getPrimitiveJavaObject(o); if (typeInfo.getTypeName().contains(serdeConstants.DECIMAL_TYPE_NAME) - || typeInfo.getTypeName() - .contains(serdeConstants.VARCHAR_TYPE_NAME) - || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME)) { + || typeInfo.getTypeName().contains(serdeConstants.VARCHAR_TYPE_NAME) + || typeInfo.getTypeName().contains(serdeConstants.CHAR_TYPE_NAME) + || typeInfo.getTypeName().contains(serdeConstants.TIMESTAMPTZ_TYPE_NAME)) { return new ExprNodeConstantDesc(typeInfo, o); } } else if (udf instanceof GenericUDFStruct diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveType.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveType.java new file mode 100644 index 0000000..31a628b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveType.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.sql.type.AbstractSqlType; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Hive-specific type. + * + * TODO: Created to represent timestamp with time-zone type. + * It can be removed once the type exists in Calcite. + */ +public class HiveType extends AbstractSqlType { + private final Class clazz; + + public HiveType(Class clazz) { + super(SqlTypeName.NULL, true, null); + this.clazz = clazz; + computeDigest(); + } + + protected void generateTypeString(StringBuilder sb, boolean withDetail) { + sb.append("HiveType("); + sb.append(clazz); + sb.append(")"); + } + + public Class getTypeClass() { + return clazz; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index f974cc9..311365c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -48,8 +48,10 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveType; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.RexVisitor; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -265,8 +267,12 @@ public ExprNodeDesc visitLiteral(RexLiteral literal) { case INTERVAL_MINUTE_SECOND: case INTERVAL_SECOND: return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, null); + case NULL: case OTHER: default: + if (lType instanceof HiveType && ((HiveType) lType).getTypeClass() == TimestampTZ.class) { + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTZTypeInfo, null); + } return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, null); } } else { @@ -334,8 +340,12 @@ public ExprNodeDesc visitLiteral(RexLiteral literal) { return new ExprNodeConstantDesc(TypeInfoFactory.intervalDayTimeTypeInfo, new HiveIntervalDayTime(secsBd)); } + case NULL: case OTHER: default: + if (lType instanceof HiveType && ((HiveType) lType).getTypeClass() == TimestampTZ.class) { + return new ExprNodeConstantDesc(TypeInfoFactory.timestampTZTypeInfo, literal.getValue3()); + } return new ExprNodeConstantDesc(TypeInfoFactory.voidTypeInfo, literal.getValue3()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 7665f56..84ad2b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -87,6 +87,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToTimestampTZ; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnixTimeStamp; @@ -382,7 +383,7 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c || (udf instanceof GenericUDFToDecimal) || (udf instanceof GenericUDFToDate) // Calcite can not specify the scale for timestamp. As a result, all // the millisecond part will be lost - || (udf instanceof GenericUDFTimestamp) + || (udf instanceof GenericUDFTimestamp) || (udf instanceof GenericUDFToTimestampTZ) || (udf instanceof GenericUDFToBinary) || castExprUsingUDFBridge(udf)) { castExpr = cluster.getRexBuilder().makeAbstractCast( TypeConverter.convert(func.getTypeInfo(), cluster.getTypeFactory()), diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index c6b34d4..2cbef00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -181,6 +181,9 @@ private static FunctionInfo handleExplicitCast(SqlOperator op, RelDataType dt) castUDF = FunctionRegistry.getFunctionInfo("double"); } else if (castType.equals(TypeInfoFactory.timestampTypeInfo)) { castUDF = FunctionRegistry.getFunctionInfo("timestamp"); + } else if (castType.equals(TypeInfoFactory.timestampTZTypeInfo)) { + castUDF = handleCastForParameterizedType(castType, + FunctionRegistry.getFunctionInfo("timestamp with time zone")); } else if (castType.equals(TypeInfoFactory.dateTypeInfo)) { castUDF = FunctionRegistry.getFunctionInfo("date"); } else if (castType instanceof DecimalTypeInfo) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java index 2df7588..df55be8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java @@ -36,10 +36,12 @@ import org.apache.calcite.util.ConversionUtil; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveType; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter.HiveToken; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.RowResolver; @@ -201,7 +203,7 @@ public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtF convertedType = dtFactory.createSqlType(SqlTypeName.TIMESTAMP); break; case TIMESTAMPTZ: - convertedType = dtFactory.createSqlType(SqlTypeName.OTHER); + convertedType = new HiveType(TimestampTZ.class); break; case INTERVAL_YEAR_MONTH: convertedType = dtFactory.createSqlIntervalType( @@ -359,8 +361,12 @@ public static TypeInfo convertPrimitiveType(RelDataType rType) { return TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); else return TypeInfoFactory.getCharTypeInfo(charLength); + case NULL: case OTHER: default: + if (rType instanceof HiveType && ((HiveType) rType).getTypeClass() == TimestampTZ.class) { + return TypeInfoFactory.timestampTZTypeInfo; + } return TypeInfoFactory.voidTypeInfo; } @@ -389,6 +395,12 @@ public static HiveToken hiveToken(RelDataType calciteType) { .getPrecision()), String.valueOf(calciteType.getScale())); } break; + case NULL: + if (calciteType instanceof HiveType && ((HiveType) calciteType).getTypeClass() == TimestampTZ.class) { + ht = new HiveToken(HiveParser.TOK_TIMESTAMPTZ, "TOK_TIMESTAMPTZ"); + break; + } + // fall-through default: ht = calciteToHiveTypeNameMap.get(calciteType.getSqlTypeName().getName()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 5b7fc25..a7a243a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -144,6 +144,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; @@ -222,10 +223,21 @@ public static String getTypeName(ASTNode node) throws SemanticException { VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(node); typeName = varcharTypeInfo.getQualifiedName(); break; + case HiveParser.TOK_TIMESTAMPTZ: + HiveConf conf; + try { + conf = Hive.get().getConf(); + } catch (HiveException e) { + throw new SemanticException(e); + } + TimestampTZTypeInfo timestampTZTypeInfo = TypeInfoFactory.getTimestampTZTypeInfo( + conf.getUserTimeZone()); + typeName = timestampTZTypeInfo.getQualifiedName(); + break; case HiveParser.TOK_DECIMAL: - DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(node); - typeName = decTypeInfo.getQualifiedName(); - break; + DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(node); + typeName = decTypeInfo.getQualifiedName(); + break; default: typeName = TokenToTypeName.get(token); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 632b9c6..4de83c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -52,6 +52,8 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.lib.ExpressionWalker; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; @@ -85,6 +87,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -893,7 +896,7 @@ protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, F protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, boolean isFunction, ArrayList children, TypeCheckCtx ctx) - throws SemanticException, UDFArgumentException { + throws HiveException, SemanticException, UDFArgumentException { // return the child directly if the conversion is redundant. if (isRedundantConversionFunction(expr, isFunction, children)) { assert (children.size() == 1); @@ -1001,6 +1004,13 @@ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, ((SettableUDF)genericUDF).setTypeInfo(varcharTypeInfo); } break; + case HiveParser.TOK_TIMESTAMPTZ: + TimestampTZTypeInfo timestampTZTypeInfo = new TimestampTZTypeInfo(); + timestampTZTypeInfo.setTimeZone(Hive.get().getConf().getUserTimeZone()); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setTypeInfo(timestampTZTypeInfo); + } + break; case HiveParser.TOK_DECIMAL: DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(funcNameNode); if (genericUDF != null) { @@ -1362,6 +1372,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } catch (UDFArgumentException e) { throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, e .getMessage()), e); + } catch (HiveException e) { + throw new SemanticException(ErrorMsg.INVALID_ARGUMENT.getMsg(expr, e + .getMessage()), e); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java index 1458211..b83e90a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java @@ -29,6 +29,8 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -69,6 +71,9 @@ private static final String[] PASSWORD_STRINGS = new String[] {"password", "paswd", "pswd"}; + private static final Pattern TIME_ZONE_PATTERN = + Pattern.compile("^time(\\s)+zone\\s", Pattern.CASE_INSENSITIVE); + public static boolean getBoolean(String value) { if (value.equals("on") || value.equals("true")) { return true; @@ -383,6 +388,12 @@ public CommandProcessorResponse run(String command) { return createProcessorSuccessResponse(); } + // Special handling for time-zone + Matcher matcher = TIME_ZONE_PATTERN.matcher(nwcmd); + if (matcher.find()) { + nwcmd = HiveConf.ConfVars.HIVE_SESSION_TIME_ZONE.varname + "=" + nwcmd.substring(matcher.end()); + } + String[] part = new String[2]; int eqIndex = nwcmd.indexOf('='); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampTZ.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampTZ.java index e96012b..087dbc9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampTZ.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToTimestampTZ.java @@ -21,10 +21,15 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampTZObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampTZConverter; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Convert from string to TIMESTAMP WITH TIME ZONE. @@ -36,11 +41,12 @@ "Examples of ZoneId and ZoneOffset are Asia/Shanghai and GMT+08:00. " + "The time and zone parts are optional. If time is absent, '00:00:00.0' will be used. " + "If zone is absent, the system time zone will be used.") -public class GenericUDFToTimestampTZ extends GenericUDF { +public class GenericUDFToTimestampTZ extends GenericUDF implements SettableUDF { private transient PrimitiveObjectInspector argumentOI; private transient PrimitiveObjectInspectorConverter.TimestampTZConverter converter; + private TimestampTZTypeInfo typeInfo; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -67,9 +73,10 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen throw new UDFArgumentException( "The function CAST as TIMESTAMP WITH TIME ZONE takes only primitive types"); } - converter = new PrimitiveObjectInspectorConverter.TimestampTZConverter(argumentOI, - PrimitiveObjectInspectorFactory.writableTimestampTZObjectInspector); - return PrimitiveObjectInspectorFactory.writableTimestampTZObjectInspector; + SettableTimestampTZObjectInspector outputOI = (SettableTimestampTZObjectInspector) + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); + converter = new TimestampTZConverter(argumentOI, outputOI); + return outputOI; } @Override @@ -84,6 +91,23 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { @Override public String getDisplayString(String[] children) { assert (children.length == 1); - return "CAST(" + children[0] + " AS TIMESTAMP WITH TIME ZONE)"; + StringBuilder sb = new StringBuilder(); + sb.append("CAST( "); + sb.append(children[0]); + sb.append(" AS "); + sb.append(typeInfo.getTypeName()); + sb.append(")"); + return sb.toString(); } + + @Override + public TypeInfo getTypeInfo() { + return typeInfo; + } + + @Override + public void setTypeInfo(TypeInfo typeInfo) throws UDFArgumentException { + this.typeInfo = (TimestampTZTypeInfo) typeInfo; + } + } diff --git ql/src/test/queries/clientpositive/timestamptz_2.q ql/src/test/queries/clientpositive/timestamptz_2.q index a335f52..88ec26b 100644 --- ql/src/test/queries/clientpositive/timestamptz_2.q +++ ql/src/test/queries/clientpositive/timestamptz_2.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion=more; +set time zone UTC; drop table tstz2; diff --git ql/src/test/queries/clientpositive/timezone.q ql/src/test/queries/clientpositive/timezone.q new file mode 100644 index 0000000..fd6c551 --- /dev/null +++ ql/src/test/queries/clientpositive/timezone.q @@ -0,0 +1,66 @@ +drop table `date_test`; +drop table `timestamp_test`; +drop table `timestamptz_test`; + +create table `date_test` (`mydate1` date); + +insert into `date_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz'); + +create table `timestamp_test` (`mydate1` timestamp); + +insert into `timestamp_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz'); + +create table `timestamptz_test` (`mydate1` timestamp with time zone); + +insert into `timestamptz_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz'); + +select * from `date_test`; +select * from `timestamp_test`; +select * from `timestamptz_test`; + +set time zone Europe/Rome; + +select * from `date_test`; +select * from `timestamp_test`; +select * from `timestamptz_test`; + +set hive.session.time.zone=America/Los_Angeles; + +select * from `date_test`; +select * from `timestamp_test`; +select * from `timestamptz_test`; + +set time zone GMT-07:00; + +select * from `date_test`; +select * from `timestamp_test`; +select * from `timestamptz_test`; + +select extract(year from `mydate1`) from `timestamptz_test`; +select extract(quarter from `mydate1`) from `timestamptz_test`; +select extract(month from `mydate1`) from `timestamptz_test`; +select extract(day from `mydate1`) from `timestamptz_test`; +select extract(hour from `mydate1`) from `timestamptz_test`; +select extract(minute from `mydate1`) from `timestamptz_test`; +select extract(second from `mydate1`) from `timestamptz_test`; diff --git ql/src/test/results/clientpositive/timestamptz_2.q.out ql/src/test/results/clientpositive/timestamptz_2.q.out index 2666735..7cbdd83 100644 --- ql/src/test/results/clientpositive/timestamptz_2.q.out +++ ql/src/test/results/clientpositive/timestamptz_2.q.out @@ -29,8 +29,8 @@ POSTHOOK: query: select * from tstz2 where t='2005-01-02 19:01:00 GMT-07:00' POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz2 #### A masked pattern was here #### -2005-01-03 02:01:00.0 Z -2005-01-03 02:01:00.0 Z +2005-01-03 02:01:00.0 UTC +2005-01-03 02:01:00.0 UTC PREHOOK: query: select * from tstz2 where t>'2013-06-03 02:01:00.30547 GMT+01:00' PREHOOK: type: QUERY PREHOOK: Input: default@tstz2 @@ -39,7 +39,7 @@ POSTHOOK: query: select * from tstz2 where t>'2013-06-03 02:01:00.30547 GMT+01:0 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz2 #### A masked pattern was here #### -2016-01-03 04:26:34.0123 Z +2016-01-03 04:26:34.0123 UTC PREHOOK: query: select min(t),max(t) from tstz2 PREHOOK: type: QUERY PREHOOK: Input: default@tstz2 @@ -48,7 +48,7 @@ POSTHOOK: query: select min(t),max(t) from tstz2 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz2 #### A masked pattern was here #### -2005-01-03 02:01:00.0 Z 2016-01-03 04:26:34.0123 Z +2005-01-03 02:01:00.0 UTC 2016-01-03 04:26:34.0123 UTC PREHOOK: query: select t from tstz2 group by t order by t PREHOOK: type: QUERY PREHOOK: Input: default@tstz2 @@ -57,10 +57,10 @@ POSTHOOK: query: select t from tstz2 group by t order by t POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz2 #### A masked pattern was here #### -2005-01-03 02:01:00.0 Z -2005-04-03 10:01:00.04067 Z -2013-06-03 01:01:00.30547 Z -2016-01-03 04:26:34.0123 Z +2005-01-03 02:01:00.0 UTC +2005-04-03 10:01:00.04067 UTC +2013-06-03 01:01:00.30547 UTC +2016-01-03 04:26:34.0123 UTC PREHOOK: query: select * from tstz2 a join tstz2 b on a.t=b.t order by a.t PREHOOK: type: QUERY PREHOOK: Input: default@tstz2 @@ -69,10 +69,10 @@ POSTHOOK: query: select * from tstz2 a join tstz2 b on a.t=b.t order by a.t POSTHOOK: type: QUERY POSTHOOK: Input: default@tstz2 #### A masked pattern was here #### -2005-01-03 02:01:00.0 Z 2005-01-03 02:01:00.0 Z -2005-01-03 02:01:00.0 Z 2005-01-03 02:01:00.0 Z -2005-01-03 02:01:00.0 Z 2005-01-03 02:01:00.0 Z -2005-01-03 02:01:00.0 Z 2005-01-03 02:01:00.0 Z -2005-04-03 10:01:00.04067 Z 2005-04-03 10:01:00.04067 Z -2013-06-03 01:01:00.30547 Z 2013-06-03 01:01:00.30547 Z -2016-01-03 04:26:34.0123 Z 2016-01-03 04:26:34.0123 Z +2005-01-03 02:01:00.0 UTC 2005-01-03 02:01:00.0 UTC +2005-01-03 02:01:00.0 UTC 2005-01-03 02:01:00.0 UTC +2005-01-03 02:01:00.0 UTC 2005-01-03 02:01:00.0 UTC +2005-01-03 02:01:00.0 UTC 2005-01-03 02:01:00.0 UTC +2005-04-03 10:01:00.04067 UTC 2005-04-03 10:01:00.04067 UTC +2013-06-03 01:01:00.30547 UTC 2013-06-03 01:01:00.30547 UTC +2016-01-03 04:26:34.0123 UTC 2016-01-03 04:26:34.0123 UTC diff --git ql/src/test/results/clientpositive/timezone.q.out ql/src/test/results/clientpositive/timezone.q.out new file mode 100644 index 0000000..7c8bfbd --- /dev/null +++ ql/src/test/results/clientpositive/timezone.q.out @@ -0,0 +1,384 @@ +PREHOOK: query: drop table `date_test` +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table `date_test` +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table `timestamp_test` +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table `timestamp_test` +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table `timestamptz_test` +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table `timestamptz_test` +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table `date_test` (`mydate1` date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_test +POSTHOOK: query: create table `date_test` (`mydate1` date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_test +PREHOOK: query: insert into `date_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz') +PREHOOK: type: QUERY +PREHOOK: Output: default@date_test +POSTHOOK: query: insert into `date_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@date_test +POSTHOOK: Lineage: date_test.mydate1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table `timestamp_test` (`mydate1` timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamp_test +POSTHOOK: query: create table `timestamp_test` (`mydate1` timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamp_test +PREHOOK: query: insert into `timestamp_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz') +PREHOOK: type: QUERY +PREHOOK: Output: default@timestamp_test +POSTHOOK: query: insert into `timestamp_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@timestamp_test +POSTHOOK: Lineage: timestamp_test.mydate1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table `timestamptz_test` (`mydate1` timestamp with time zone) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamptz_test +POSTHOOK: query: create table `timestamptz_test` (`mydate1` timestamp with time zone) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamptz_test +PREHOOK: query: insert into `timestamptz_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz') +PREHOOK: type: QUERY +PREHOOK: Output: default@timestamptz_test +POSTHOOK: query: insert into `timestamptz_test` VALUES + ('2011-01-01 01:01:01.123'), + ('2011-01-01 01:01:01.123 Europe/Rome'), + ('2011-01-01 01:01:01.123 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912'), + ('2011-01-01 01:01:01.12345678912 Europe/Rome'), + ('2011-01-01 01:01:01.12345678912 GMT-05:00'), + ('2011-01-01 01:01:01.12345678912 xyz') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@timestamptz_test +POSTHOOK: Lineage: timestamptz_test.mydate1 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from `date_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@date_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `date_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_test +#### A masked pattern was here #### +2011-01-01 +2010-12-31 +2010-12-31 +2011-01-01 +2010-12-31 +2010-12-31 +NULL +PREHOOK: query: select * from `timestamp_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamp_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +2011-01-01 01:01:01.123 +2010-12-31 16:01:01.123 +2010-12-31 22:01:01.123 +2011-01-01 01:01:01.123456789 +2010-12-31 16:01:01.123456789 +2010-12-31 22:01:01.123456789 +NULL +PREHOOK: query: select * from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +2011-01-01 01:01:01.123 US/Pacific +2010-12-31 16:01:01.123 US/Pacific +2010-12-31 22:01:01.123 US/Pacific +2011-01-01 01:01:01.123456789 US/Pacific +2010-12-31 16:01:01.123456789 US/Pacific +2010-12-31 22:01:01.123456789 US/Pacific +NULL +PREHOOK: query: select * from `date_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@date_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `date_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_test +#### A masked pattern was here #### +2011-01-01 +2010-12-31 +2010-12-31 +2011-01-01 +2010-12-31 +2010-12-31 +NULL +PREHOOK: query: select * from `timestamp_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamp_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +2011-01-01 01:01:01.123 +2010-12-31 16:01:01.123 +2010-12-31 22:01:01.123 +2011-01-01 01:01:01.123456789 +2010-12-31 16:01:01.123456789 +2010-12-31 22:01:01.123456789 +NULL +PREHOOK: query: select * from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +2011-01-01 10:01:01.123 Europe/Rome +2011-01-01 01:01:01.123 Europe/Rome +2011-01-01 07:01:01.123 Europe/Rome +2011-01-01 10:01:01.123456789 Europe/Rome +2011-01-01 01:01:01.123456789 Europe/Rome +2011-01-01 07:01:01.123456789 Europe/Rome +NULL +PREHOOK: query: select * from `date_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@date_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `date_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_test +#### A masked pattern was here #### +2011-01-01 +2010-12-31 +2010-12-31 +2011-01-01 +2010-12-31 +2010-12-31 +NULL +PREHOOK: query: select * from `timestamp_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamp_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +2011-01-01 01:01:01.123 +2010-12-31 16:01:01.123 +2010-12-31 22:01:01.123 +2011-01-01 01:01:01.123456789 +2010-12-31 16:01:01.123456789 +2010-12-31 22:01:01.123456789 +NULL +PREHOOK: query: select * from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +2011-01-01 01:01:01.123 America/Los_Angeles +2010-12-31 16:01:01.123 America/Los_Angeles +2010-12-31 22:01:01.123 America/Los_Angeles +2011-01-01 01:01:01.123456789 America/Los_Angeles +2010-12-31 16:01:01.123456789 America/Los_Angeles +2010-12-31 22:01:01.123456789 America/Los_Angeles +NULL +PREHOOK: query: select * from `date_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@date_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `date_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_test +#### A masked pattern was here #### +2011-01-01 +2010-12-31 +2010-12-31 +2011-01-01 +2010-12-31 +2010-12-31 +NULL +PREHOOK: query: select * from `timestamp_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamp_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_test +#### A masked pattern was here #### +2011-01-01 01:01:01.123 +2010-12-31 16:01:01.123 +2010-12-31 22:01:01.123 +2011-01-01 01:01:01.123456789 +2010-12-31 16:01:01.123456789 +2010-12-31 22:01:01.123456789 +NULL +PREHOOK: query: select * from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select * from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +2011-01-01 02:01:01.123 GMT-07:00 +2010-12-31 17:01:01.123 GMT-07:00 +2010-12-31 23:01:01.123 GMT-07:00 +2011-01-01 02:01:01.123456789 GMT-07:00 +2010-12-31 17:01:01.123456789 GMT-07:00 +2010-12-31 23:01:01.123456789 GMT-07:00 +NULL +PREHOOK: query: select extract(year from `mydate1`) from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select extract(year from `mydate1`) from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +2011 +2010 +2010 +2011 +2010 +2010 +NULL +PREHOOK: query: select extract(quarter from `mydate1`) from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select extract(quarter from `mydate1`) from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +1 +4 +4 +1 +4 +4 +NULL +PREHOOK: query: select extract(month from `mydate1`) from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select extract(month from `mydate1`) from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +1 +12 +12 +1 +12 +12 +NULL +PREHOOK: query: select extract(day from `mydate1`) from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select extract(day from `mydate1`) from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +1 +31 +31 +1 +31 +31 +NULL +PREHOOK: query: select extract(hour from `mydate1`) from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select extract(hour from `mydate1`) from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +2 +17 +23 +2 +17 +23 +NULL +PREHOOK: query: select extract(minute from `mydate1`) from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select extract(minute from `mydate1`) from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +NULL +PREHOOK: query: select extract(second from `mydate1`) from `timestamptz_test` +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +POSTHOOK: query: select extract(second from `mydate1`) from `timestamptz_test` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamptz_test +#### A masked pattern was here #### +1 +1 +1 +1 +1 +1 +NULL diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java index f333ae9..bfc50db 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -83,6 +83,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -429,7 +430,7 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, for (int i = 0; i < data.length; i++) { data[i] = buffer.read(invert); } - tstz.fromBinarySortable(data, 0); + tstz.fromBinarySortable(data, 0, ((TimestampTZTypeInfo) type).timeZone()); return tstz; case INTERVAL_YEAR_MONTH: { HiveIntervalYearMonthWritable i = reuse == null ? new HiveIntervalYearMonthWritable() diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampTZWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampTZWritable.java index 8c3f8f6..a74c6f7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampTZWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampTZWritable.java @@ -27,12 +27,16 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.time.ZoneId; import java.util.Arrays; /** * Writable for TimestampTZ. Copied from TimestampWritable. * After we replace {@link java.sql.Timestamp} with {@link java.time.LocalDateTime} for Timestamp, * it'll need a new Writable. + * All timestamp with time zone will be serialized as UTC retaining the instant. + * E.g. "2017-04-14 18:00:00 Asia/Shanghai" will be converted to + * "2017-04-14 10:00:00.0 Z". */ public class TimestampTZWritable implements WritableComparable { @@ -48,6 +52,7 @@ public static final int BINARY_SORTABLE_LENGTH = 11; private TimestampTZ timestampTZ = new TimestampTZ(); + private ZoneId timeZone; /** * true if data is stored in timestamptz field rather than byte arrays. @@ -64,26 +69,27 @@ private int offset; public TimestampTZWritable() { - bytesEmpty = false; - currentBytes = internalBytes; - offset = 0; + this.bytesEmpty = false; + this.currentBytes = internalBytes; + this.offset = 0; } - public TimestampTZWritable(byte[] bytes, int offset) { - set(bytes, offset); + public TimestampTZWritable(byte[] bytes, int offset, ZoneId timeZone) { + set(bytes, offset, timeZone); } public TimestampTZWritable(TimestampTZWritable other) { - this(other.getBytes(), 0); + this(other.getBytes(), 0, other.getTimestampTZ().getZonedDateTime().getZone()); } public TimestampTZWritable(TimestampTZ tstz) { set(tstz); } - public void set(byte[] bytes, int offset) { + public void set(byte[] bytes, int offset, ZoneId timeZone) { externalBytes = bytes; this.offset = offset; + this.timeZone = timeZone; bytesEmpty = false; timestampTZEmpty = true; currentBytes = externalBytes; @@ -95,6 +101,7 @@ public void set(TimestampTZ tstz) { return; } timestampTZ = tstz; + timeZone = timestampTZ.getZonedDateTime().getZone(); bytesEmpty = true; timestampTZEmpty = false; } @@ -103,12 +110,24 @@ public void set(TimestampTZWritable t) { if (t.bytesEmpty) { set(t.getTimestampTZ()); } else if (t.currentBytes == t.externalBytes) { - set(t.currentBytes, t.offset); + set(t.currentBytes, t.offset, t.timeZone); } else { - set(t.currentBytes, 0); + set(t.currentBytes, 0, t.timeZone); } } + public void setTimeZone(ZoneId timeZone) { + if (timestampTZ != null) { + timestampTZ.setZonedDateTime( + timestampTZ.getZonedDateTime().withZoneSameInstant(timeZone)); + } + this.timeZone = timeZone; + } + + public ZoneId getTimeZone() { + return timeZone; + } + public TimestampTZ getTimestampTZ() { populateTimestampTZ(); return timestampTZ; @@ -176,9 +195,12 @@ private void populateBytes() { private void populateTimestampTZ() { if (timestampTZEmpty) { - long seconds = getSeconds(); - int nanos = getNanos(); - timestampTZ.set(seconds, nanos); + if (bytesEmpty) { + throw new IllegalStateException("Bytes are empty"); + } + long seconds = getSeconds(currentBytes, offset); + int nanos = hasDecimalOrSecondVInt(currentBytes[offset]) ? getNanos(currentBytes, offset + 4) : 0; + timestampTZ.set(seconds, nanos, timeZone); timestampTZEmpty = false; } } @@ -272,11 +294,11 @@ public void readFields(DataInput dataInput) throws IOException { return b; } - public void fromBinarySortable(byte[] bytes, int binSortOffset) { + public void fromBinarySortable(byte[] bytes, int binSortOffset, ZoneId timeZone) { // Flip the sign bit (and unused bits of the high-order byte) of the seven-byte long back. long seconds = readSevenByteLong(bytes, binSortOffset) ^ SEVEN_BYTE_LONG_SIGN_FLIP; int nanos = bytesToInt(bytes, binSortOffset + 7); - timestampTZ.set(seconds, nanos); + timestampTZ.set(seconds, nanos, timeZone); timestampTZEmpty = false; bytesEmpty = true; } @@ -315,10 +337,10 @@ private static boolean setNanosBytes(int nanos, byte[] b, int offset, boolean ha return decimal != 0; } - public static void setTimestampTZ(TimestampTZ t, byte[] bytes, int offset) { + public static void setTimestampTZ(TimestampTZ t, byte[] bytes, int offset, ZoneId timeZone) { long seconds = getSeconds(bytes, offset); int nanos = hasDecimalOrSecondVInt(bytes[offset]) ? getNanos(bytes, offset + 4) : 0; - t.set(seconds, nanos); + t.set(seconds, nanos, timeZone); } public static int getTotalLength(byte[] bytes, int offset) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java index ee4bb34..0b015c9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java @@ -23,8 +23,6 @@ import java.util.List; import java.util.Properties; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.classification.InterfaceAudience.Public; import org.apache.hadoop.hive.common.classification.InterfaceStability.Stable; @@ -33,11 +31,14 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.HiveStringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * SerDeParameters. @@ -88,7 +89,7 @@ public LazySerDeParameters(Configuration job, Properties tbl, String serdeName) lastColumnTakesRest = (lastColumnTakesRestString != null && lastColumnTakesRestString .equalsIgnoreCase("true")); - extractColumnInfo(); + extractColumnInfo(job); // Create the LazyObject for storing the rows rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); @@ -133,7 +134,7 @@ public LazySerDeParameters(Configuration job, Properties tbl, String serdeName) * Extracts and set column names and column types from the table properties * @throws SerDeException */ - public void extractColumnInfo() throws SerDeException { + public void extractColumnInfo(Configuration conf) throws SerDeException { // Read the configuration parameters String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS); // NOTE: if "columns.types" is missing, all columns will be of String type @@ -160,6 +161,16 @@ public void extractColumnInfo() throws SerDeException { } columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + // Insert time-zone for timestamp type + if (conf != null) { + final TimestampTZTypeInfo tsTZTypeInfo = new TimestampTZTypeInfo( + conf.get(ConfVars.HIVE_SESSION_TIME_ZONE.varname)); + for (int i = 0; i < columnTypes.size(); i++) { + if (columnTypes.get(i) instanceof TimestampTZTypeInfo) { + columnTypes.set(i, tsTZTypeInfo); + } + } + } if (columnNames.size() != columnTypes.size()) { throw new SerDeException(serdeName + ": columns has " + columnNames.size() diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestampTZ.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestampTZ.java index df5c586..3f37bbd 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestampTZ.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestampTZ.java @@ -17,18 +17,20 @@ */ package org.apache.hadoop.hive.serde2.lazy; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.time.ZoneId; +import java.time.format.DateTimeParseException; + import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.TimestampTZWritable; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampTZObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.io.OutputStream; -import java.io.UnsupportedEncodingException; -import java.time.format.DateTimeParseException; - /** * LazyPrimitive for TimestampTZ. Similar to LazyTimestamp. */ @@ -37,13 +39,21 @@ private static final Logger LOG = LoggerFactory.getLogger(LazyTimestampTZ.class); + private ZoneId timeZone; + public LazyTimestampTZ(LazyTimestampTZObjectInspector lazyTimestampTZObjectInspector) { super(lazyTimestampTZObjectInspector); + TimestampTZTypeInfo typeInfo = (TimestampTZTypeInfo) oi.getTypeInfo(); + if (typeInfo == null) { + throw new RuntimeException("TimestampTZ type used without type params"); + } + timeZone = typeInfo.timeZone(); data = new TimestampTZWritable(); } public LazyTimestampTZ(LazyTimestampTZ copy) { super(copy); + timeZone = copy.timeZone; data = new TimestampTZWritable(copy.data); } @@ -63,7 +73,7 @@ public void init(ByteArrayRef bytes, int start, int length) { logExceptionMessage(bytes, start, length, serdeConstants.TIMESTAMPTZ_TYPE_NAME.toUpperCase()); } else { - t = TimestampTZ.parse(s); + t = TimestampTZ.parse(s, timeZone); isNull = false; } } catch (UnsupportedEncodingException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java index 6d1ee1e..8f02bff 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; @@ -71,8 +72,6 @@ new LazyDateObjectInspector(); public static final LazyTimestampObjectInspector LAZY_TIMESTAMP_OBJECT_INSPECTOR = new LazyTimestampObjectInspector(); - public static final LazyTimestampTZObjectInspector LAZY_TIMESTAMPTZ_OBJECT_INSPECTOR = - new LazyTimestampTZObjectInspector(); public static final LazyHiveIntervalYearMonthObjectInspector LAZY_INTERVAL_YEAR_MONTH_OBJECT_INSPECTOR = new LazyHiveIntervalYearMonthObjectInspector(); public static final LazyHiveIntervalDayTimeObjectInspector LAZY_INTERVAL_DAY_TIME_OBJECT_INSPECTOR = @@ -113,8 +112,6 @@ private LazyPrimitiveObjectInspectorFactory() { LAZY_DATE_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME), LAZY_TIMESTAMP_OBJECT_INSPECTOR); - cachedPrimitiveLazyObjectInspectors.put(TypeInfoFactory.getPrimitiveTypeInfo( - serdeConstants.TIMESTAMPTZ_TYPE_NAME), LAZY_TIMESTAMPTZ_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME), LAZY_INTERVAL_YEAR_MONTH_OBJECT_INSPECTOR); cachedPrimitiveLazyObjectInspectors.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME), @@ -175,6 +172,9 @@ private LazyPrimitiveObjectInspectorFactory() { case DECIMAL: poi = new LazyHiveDecimalObjectInspector((DecimalTypeInfo)typeInfo); break; + case TIMESTAMPTZ: + poi = new LazyTimestampTZObjectInspector((TimestampTZTypeInfo)typeInfo); + break; default: throw new RuntimeException( "Primitve type " + typeInfo.getPrimitiveCategory() + " should not take parameters"); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampTZObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampTZObjectInspector.java index 7336385..276646d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampTZObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampTZObjectInspector.java @@ -21,19 +21,28 @@ import org.apache.hadoop.hive.serde2.io.TimestampTZWritable; import org.apache.hadoop.hive.serde2.lazy.LazyTimestampTZ; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampTZObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; public class LazyTimestampTZObjectInspector extends AbstractPrimitiveLazyObjectInspector implements TimestampTZObjectInspector { - public LazyTimestampTZObjectInspector() { - super(TypeInfoFactory.timestampTZTypeInfo); + protected LazyTimestampTZObjectInspector(TimestampTZTypeInfo typeInfo) { + super(typeInfo); } @Override public TimestampTZ getPrimitiveJavaObject(Object o) { - return o == null ? null : ((LazyTimestampTZ) o).getWritableObject().getTimestampTZ(); + if (o == null) { + return null; + } + + TimestampTZ t = ((LazyTimestampTZ) o).getWritableObject().getTimestampTZ(); + TimestampTZTypeInfo timestampTZTypeInfo = (TimestampTZTypeInfo) typeInfo; + if (!t.getZonedDateTime().getZone().equals(timestampTZTypeInfo.timeZone())) { + t.setZonedDateTime(t.getZonedDateTime().withZoneSameInstant(timestampTZTypeInfo.timeZone())); + } + return t; } @Override diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestampTZ.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestampTZ.java index 6d9ca6e..b79147d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestampTZ.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestampTZ.java @@ -17,20 +17,27 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; +import java.time.ZoneId; + import org.apache.hadoop.hive.serde2.io.TimestampTZWritable; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampTZObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; public class LazyBinaryTimestampTZ extends LazyBinaryPrimitive { + private ZoneId timeZone; + public LazyBinaryTimestampTZ(WritableTimestampTZObjectInspector oi) { super(oi); - data = new TimestampTZWritable(); + TimestampTZTypeInfo typeInfo = (TimestampTZTypeInfo) oi.getTypeInfo(); + this.timeZone = typeInfo.timeZone(); + this.data = new TimestampTZWritable(); } @Override public void init(ByteArrayRef bytes, int start, int length) { - data.set(bytes.getData(), start); + data.set(bytes.getData(), start, timeZone); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java index ca96e33..f9d4238 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -43,7 +43,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampTZObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** * ObjectInspectorConverters. @@ -125,7 +124,8 @@ private static Converter getConverter(PrimitiveObjectInspector inputOI, inputOI, (SettableTimestampObjectInspector) outputOI); case TIMESTAMPTZ: - return new PrimitiveObjectInspectorConverter.TimestampTZConverter(inputOI, + return new PrimitiveObjectInspectorConverter.TimestampTZConverter( + inputOI, (SettableTimestampTZObjectInspector) outputOI); case INTERVAL_YEAR_MONTH: return new PrimitiveObjectInspectorConverter.HiveIntervalYearMonthConverter( diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampTZObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampTZObjectInspector.java index 32b9c69..db911c8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampTZObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaTimestampTZObjectInspector.java @@ -19,18 +19,22 @@ import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.serde2.io.TimestampTZWritable; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; public class JavaTimestampTZObjectInspector extends AbstractPrimitiveJavaObjectInspector implements SettableTimestampTZObjectInspector { - JavaTimestampTZObjectInspector() { - super(TypeInfoFactory.timestampTZTypeInfo); + public JavaTimestampTZObjectInspector() { + } + + public JavaTimestampTZObjectInspector(TimestampTZTypeInfo typeInfo) { + super(typeInfo); } @Override public Object set(Object o, byte[] bytes, int offset) { - TimestampTZWritable.setTimestampTZ((TimestampTZ) o, bytes, offset); + TimestampTZWritable.setTimestampTZ( + (TimestampTZ) o, bytes, offset, ((TimestampTZTypeInfo) typeInfo).timeZone()); return o; } @@ -39,7 +43,8 @@ public Object set(Object o, TimestampTZ t) { if (t == null) { return null; } - ((TimestampTZ) o).set(t.getEpochSecond(), t.getNanos()); + ((TimestampTZ) o).setZonedDateTime( + t.getZonedDateTime().withZoneSameInstant(((TimestampTZTypeInfo) typeInfo).timeZone())); return o; } @@ -48,29 +53,51 @@ public Object set(Object o, TimestampTZWritable t) { if (t == null) { return null; } - ((TimestampTZ) o).set(t.getSeconds(), t.getNanos()); + ((TimestampTZ) o).setZonedDateTime( + t.getTimestampTZ().getZonedDateTime().withZoneSameInstant(((TimestampTZTypeInfo) typeInfo).timeZone())); return o; } @Override public Object create(byte[] bytes, int offset) { TimestampTZ t = new TimestampTZ(); - TimestampTZWritable.setTimestampTZ(t, bytes, offset); + TimestampTZWritable.setTimestampTZ( + t, bytes, offset, ((TimestampTZTypeInfo) typeInfo).timeZone()); return t; } @Override public Object create(TimestampTZ t) { - return new TimestampTZ(t.getZonedDateTime()); + return t; } @Override public TimestampTZWritable getPrimitiveWritableObject(Object o) { - return o == null ? null : new TimestampTZWritable((TimestampTZ) o); + if (o == null) { + return null; + } + + TimestampTZ t = (TimestampTZ) o; + TimestampTZTypeInfo timestampTZTypeInfo = (TimestampTZTypeInfo) typeInfo; + if (!t.getZonedDateTime().getZone().equals(timestampTZTypeInfo.timeZone())) { + t.setZonedDateTime( + t.getZonedDateTime().withZoneSameInstant(timestampTZTypeInfo.timeZone())); + } + return new TimestampTZWritable(t); } @Override public TimestampTZ getPrimitiveJavaObject(Object o) { - return o == null ? null : (TimestampTZ) o; + if (o == null) { + return null; + } + + TimestampTZ t = (TimestampTZ) o; + TimestampTZTypeInfo timestampTZTypeInfo = (TimestampTZTypeInfo) typeInfo; + if (!t.getZonedDateTime().getZone().equals(timestampTZTypeInfo.timeZone())) { + t.setZonedDateTime( + t.getZonedDateTime().withZoneSameInstant(timestampTZTypeInfo.timeZone())); + } + return t; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java index d4b7a32..4cbbd71 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -20,6 +20,7 @@ import java.sql.Date; import java.sql.Timestamp; +import java.time.ZoneId; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -28,12 +29,11 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; @@ -297,12 +297,15 @@ public Object convert(Object input) { final PrimitiveObjectInspector inputOI; final SettableTimestampTZObjectInspector outputOI; final Object r; + final ZoneId timeZone; - public TimestampTZConverter(PrimitiveObjectInspector inputOI, + public TimestampTZConverter( + PrimitiveObjectInspector inputOI, SettableTimestampTZObjectInspector outputOI) { this.inputOI = inputOI; this.outputOI = outputOI; - r = outputOI.create(new TimestampTZ()); + this.r = outputOI.create(new TimestampTZ()); + this.timeZone = ((TimestampTZTypeInfo) outputOI.getTypeInfo()).timeZone(); } @Override @@ -311,7 +314,7 @@ public Object convert(Object input) { return null; } - return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestampTZ(input, inputOI)); + return outputOI.set(r, PrimitiveObjectInspectorUtils.getTimestampTZ(input, inputOI, timeZone)); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java index 2425c30..8211ff4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BooleanWritable; @@ -89,7 +90,7 @@ public static final WritableTimestampObjectInspector writableTimestampObjectInspector = new WritableTimestampObjectInspector(); public static final WritableTimestampTZObjectInspector writableTimestampTZObjectInspector = - new WritableTimestampTZObjectInspector(); + new WritableTimestampTZObjectInspector(TypeInfoFactory.timestampTZTypeInfo); public static final WritableHiveIntervalYearMonthObjectInspector writableHiveIntervalYearMonthObjectInspector = new WritableHiveIntervalYearMonthObjectInspector(); public static final WritableHiveIntervalDayTimeObjectInspector writableHiveIntervalDayTimeObjectInspector = @@ -127,8 +128,7 @@ writableDateObjectInspector); cachedPrimitiveWritableInspectorCache.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME), writableTimestampObjectInspector); - cachedPrimitiveWritableInspectorCache.put(TypeInfoFactory.getPrimitiveTypeInfo( - serdeConstants.TIMESTAMPTZ_TYPE_NAME), writableTimestampTZObjectInspector); + cachedPrimitiveWritableInspectorCache.put(TypeInfoFactory.timestampTZTypeInfo, writableTimestampTZObjectInspector); cachedPrimitiveWritableInspectorCache.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME), writableHiveIntervalYearMonthObjectInspector); cachedPrimitiveWritableInspectorCache.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME), @@ -154,8 +154,7 @@ primitiveCategoryToWritableOI.put(PrimitiveCategory.VOID, writableVoidObjectInspector); primitiveCategoryToWritableOI.put(PrimitiveCategory.DATE, writableDateObjectInspector); primitiveCategoryToWritableOI.put(PrimitiveCategory.TIMESTAMP, writableTimestampObjectInspector); - primitiveCategoryToWritableOI.put(PrimitiveCategory.TIMESTAMPTZ, - writableTimestampTZObjectInspector); + primitiveCategoryToWritableOI.put(PrimitiveCategory.TIMESTAMPTZ, writableTimestampTZObjectInspector); primitiveCategoryToWritableOI.put(PrimitiveCategory.INTERVAL_YEAR_MONTH, writableHiveIntervalYearMonthObjectInspector); primitiveCategoryToWritableOI.put(PrimitiveCategory.INTERVAL_DAY_TIME, writableHiveIntervalDayTimeObjectInspector); primitiveCategoryToWritableOI.put(PrimitiveCategory.BINARY, writableBinaryObjectInspector); @@ -189,7 +188,7 @@ public static final JavaTimestampObjectInspector javaTimestampObjectInspector = new JavaTimestampObjectInspector(); public static final JavaTimestampTZObjectInspector javaTimestampTZObjectInspector = - new JavaTimestampTZObjectInspector(); + new JavaTimestampTZObjectInspector(TypeInfoFactory.timestampTZTypeInfo); public static final JavaHiveIntervalYearMonthObjectInspector javaHiveIntervalYearMonthObjectInspector = new JavaHiveIntervalYearMonthObjectInspector(); public static final JavaHiveIntervalDayTimeObjectInspector javaHiveIntervalDayTimeObjectInspector = @@ -227,8 +226,7 @@ javaDateObjectInspector); cachedPrimitiveJavaInspectorCache.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME), javaTimestampObjectInspector); - cachedPrimitiveJavaInspectorCache.put(TypeInfoFactory.timestampTZTypeInfo, - javaTimestampTZObjectInspector); + cachedPrimitiveJavaInspectorCache.put(TypeInfoFactory.timestampTZTypeInfo, javaTimestampTZObjectInspector); cachedPrimitiveJavaInspectorCache.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME), javaHiveIntervalYearMonthObjectInspector); cachedPrimitiveJavaInspectorCache.put(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME), @@ -297,6 +295,9 @@ public static AbstractPrimitiveWritableObjectInspector getPrimitiveWritableObjec case VARCHAR: result = new WritableHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo); break; + case TIMESTAMPTZ: + result = new WritableTimestampTZObjectInspector((TimestampTZTypeInfo)typeInfo); + break; case DECIMAL: result = new WritableHiveDecimalObjectInspector((DecimalTypeInfo)typeInfo); break; @@ -349,7 +350,7 @@ public static ConstantObjectInspector getPrimitiveWritableConstantObjectInspecto case TIMESTAMP: return new WritableConstantTimestampObjectInspector((TimestampWritable)value); case TIMESTAMPTZ: - return new WritableConstantTimestampTZObjectInspector((TimestampTZWritable) value); + return new WritableConstantTimestampTZObjectInspector((TimestampTZTypeInfo)typeInfo, (TimestampTZWritable) value); case INTERVAL_YEAR_MONTH: return new WritableConstantHiveIntervalYearMonthObjectInspector((HiveIntervalYearMonthWritable) value); case INTERVAL_DAY_TIME: @@ -402,6 +403,9 @@ public static AbstractPrimitiveJavaObjectInspector getPrimitiveJavaObjectInspect case VARCHAR: result = new JavaHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo); break; + case TIMESTAMPTZ: + result = new JavaTimestampTZObjectInspector((TimestampTZTypeInfo)typeInfo); + break; case DECIMAL: result = new JavaHiveDecimalObjectInspector((DecimalTypeInfo)typeInfo); break; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 886c298..d14de63 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -24,6 +24,8 @@ import java.nio.charset.CharacterCodingException; import java.sql.Date; import java.sql.Timestamp; +import java.time.DateTimeException; +import java.time.ZoneId; import java.util.HashMap; import java.util.Map; @@ -1051,16 +1053,26 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { try { result = Date.valueOf(s); } catch (IllegalArgumentException e) { - result = null; + Timestamp ts = getTimestampFromString(s); + if (ts != null) { + result = new Date(ts.getTime()); + } else { + result = null; + } } break; case CHAR: case VARCHAR: { + String val = getString(o, oi).trim(); try { - String val = getString(o, oi).trim(); result = Date.valueOf(val); } catch (IllegalArgumentException e) { - result = null; + Timestamp ts = getTimestampFromString(val); + if (ts != null) { + result = new Date(ts.getTime()); + } else { + result = null; + } } break; } @@ -1164,7 +1176,44 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector inputOI, return result; } - public static TimestampTZ getTimestampTZ(Object o, PrimitiveObjectInspector oi) { + static Timestamp getTimestampFromString(String s) { + Timestamp result; + s = s.trim(); + s = trimNanoTimestamp(s); + + int firstSpace = s.indexOf(' '); + if (firstSpace < 0) { + s = s.concat(" 00:00:00"); + } + try { + result = Timestamp.valueOf(s); + } catch (IllegalArgumentException e) { + // Let's try to parse it as timestamp with time zone and transform + try { + result = Timestamp.from(TimestampTZ.parse(s).getZonedDateTime().toInstant()); + } catch (DateTimeException e2) { + result = null; + } + } + return result; + } + + private static String trimNanoTimestamp(String s) { + int firstSpace = s.indexOf(' '); + // Throw away extra if more than 9 decimal places + int periodIdx = s.indexOf("."); + if (periodIdx != -1) { + int secondSpace = firstSpace < 0 ? -1 : s.indexOf(' ', firstSpace + 1); + int maxLength = secondSpace == -1 ? s.length() : secondSpace; + if (maxLength - periodIdx > 9) { + s = s.substring(0, periodIdx + 10).concat(s.substring(maxLength, s.length())); + } + } + return s; + } + + public static TimestampTZ getTimestampTZ(Object o, PrimitiveObjectInspector oi, + ZoneId timeZone) { if (o == null) { return null; } @@ -1172,25 +1221,23 @@ public static TimestampTZ getTimestampTZ(Object o, PrimitiveObjectInspector oi) case VOID: { return null; } - // The resulting timestamp with time zone will have TZ in UTC - // instead of the original TZ in the string representation. case STRING: { StringObjectInspector soi = (StringObjectInspector) oi; String s = soi.getPrimitiveJavaObject(o).trim(); - return TimestampTZ.parseOrNull(s); + return TimestampTZ.parseOrNull(trimNanoTimestamp(s), timeZone); } case CHAR: case VARCHAR: { String s = getString(o, oi).trim(); - return TimestampTZ.parseOrNull(s); + return TimestampTZ.parseOrNull(trimNanoTimestamp(s), timeZone); } case DATE: { Date date = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).get(); - return TimestampTZ.convert(date); + return TimestampTZ.convert(date, timeZone); } case TIMESTAMP: { Timestamp ts = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).getTimestamp(); - return TimestampTZ.convert(ts); + return TimestampTZ.convert(ts, timeZone); } case TIMESTAMPTZ: { return ((TimestampTZObjectInspector) oi).getPrimitiveWritableObject(o).getTimestampTZ(); @@ -1201,28 +1248,6 @@ public static TimestampTZ getTimestampTZ(Object o, PrimitiveObjectInspector oi) } } - static Timestamp getTimestampFromString(String s) { - Timestamp result; - s = s.trim(); - - // Throw away extra if more than 9 decimal places - int periodIdx = s.indexOf("."); - if (periodIdx != -1) { - if (s.length() - periodIdx > 9) { - s = s.substring(0, periodIdx + 10); - } - } - if (s.indexOf(' ') < 0) { - s = s.concat(" 00:00:00"); - } - try { - result = Timestamp.valueOf(s); - } catch (IllegalArgumentException e) { - result = null; - } - return result; - } - public static HiveIntervalYearMonth getHiveIntervalYearMonth(Object o, PrimitiveObjectInspector oi) { if (o == null) { return null; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantTimestampTZObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantTimestampTZObjectInspector.java index 5805ce8..f3824be 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantTimestampTZObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantTimestampTZObjectInspector.java @@ -19,13 +19,17 @@ import org.apache.hadoop.hive.serde2.io.TimestampTZWritable; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; public class WritableConstantTimestampTZObjectInspector extends WritableTimestampTZObjectInspector implements ConstantObjectInspector { private TimestampTZWritable value; - public WritableConstantTimestampTZObjectInspector(TimestampTZWritable value) { + public WritableConstantTimestampTZObjectInspector( + TimestampTZTypeInfo typeInfo, + TimestampTZWritable value) { + super(typeInfo); this.value = value; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampTZObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampTZObjectInspector.java index 0b622c1..ac3be5e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampTZObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableTimestampTZObjectInspector.java @@ -19,23 +19,47 @@ import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.serde2.io.TimestampTZWritable; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TimestampTZTypeInfo; public class WritableTimestampTZObjectInspector extends AbstractPrimitiveWritableObjectInspector implements SettableTimestampTZObjectInspector { public WritableTimestampTZObjectInspector() { - super(TypeInfoFactory.timestampTZTypeInfo); + } + + public WritableTimestampTZObjectInspector(TimestampTZTypeInfo typeInfo) { + super(typeInfo); } @Override public TimestampTZWritable getPrimitiveWritableObject(Object o) { - return o == null ? null : (TimestampTZWritable) o; + if (o == null) { + return null; + } + TimestampTZWritable t = (TimestampTZWritable) o; + TimestampTZTypeInfo timestampTZTypeInfo = (TimestampTZTypeInfo) typeInfo; + if (!t.getTimestampTZ().getZonedDateTime().getZone().equals(timestampTZTypeInfo.timeZone())) { + t.setTimeZone(timestampTZTypeInfo.timeZone()); + } + return t; + } + + @Override + public TimestampTZ getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + TimestampTZWritable t = (TimestampTZWritable) o; + TimestampTZTypeInfo timestampTZTypeInfo = (TimestampTZTypeInfo) typeInfo; + if (!t.getTimestampTZ().getZonedDateTime().getZone().equals(timestampTZTypeInfo.timeZone())) { + t.setTimeZone(timestampTZTypeInfo.timeZone()); + } + return t.getTimestampTZ(); } @Override public Object set(Object o, byte[] bytes, int offset) { - ((TimestampTZWritable) o).set(bytes, offset); + ((TimestampTZWritable) o).set(bytes, offset, ((TimestampTZTypeInfo) typeInfo).timeZone()); return o; } @@ -44,6 +68,10 @@ public Object set(Object o, TimestampTZ t) { if (t == null) { return null; } + TimestampTZTypeInfo timestampTZTypeInfo = (TimestampTZTypeInfo) typeInfo; + if (!t.getZonedDateTime().getZone().equals(timestampTZTypeInfo.timeZone())) { + t.setZonedDateTime(t.getZonedDateTime().withZoneSameInstant(timestampTZTypeInfo.timeZone())); + } ((TimestampTZWritable) o).set(t); return o; } @@ -53,13 +81,18 @@ public Object set(Object o, TimestampTZWritable t) { if (t == null) { return null; } + TimestampTZTypeInfo timestampTZTypeInfo = (TimestampTZTypeInfo) typeInfo; + if (!t.getTimestampTZ().getZonedDateTime().getZone().equals(timestampTZTypeInfo.timeZone())) { + t.getTimestampTZ().setZonedDateTime( + t.getTimestampTZ().getZonedDateTime().withZoneSameInstant(timestampTZTypeInfo.timeZone())); + } ((TimestampTZWritable) o).set(t); return o; } @Override public Object create(byte[] bytes, int offset) { - return new TimestampTZWritable(bytes, offset); + return new TimestampTZWritable(bytes, offset, ((TimestampTZTypeInfo) typeInfo).timeZone()); } @Override @@ -68,11 +101,6 @@ public Object create(TimestampTZ t) { } @Override - public TimestampTZ getPrimitiveJavaObject(Object o) { - return o == null ? null : ((TimestampTZWritable) o).getTimestampTZ(); - } - - @Override public Object copyObject(Object o) { return o == null ? null : new TimestampTZWritable((TimestampTZWritable) o); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TimestampTZTypeInfo.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TimestampTZTypeInfo.java new file mode 100644 index 0000000..fa099ea --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TimestampTZTypeInfo.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.time.DateTimeException; +import java.time.ZoneId; +import java.util.Objects; + +import org.apache.hadoop.hive.serde.serdeConstants; + +public class TimestampTZTypeInfo extends PrimitiveTypeInfo { + private static final long serialVersionUID = 1L; + + private ZoneId timeZone; + + public TimestampTZTypeInfo() { + super(serdeConstants.TIMESTAMPTZ_TYPE_NAME); + } + + public TimestampTZTypeInfo(String timeZoneStr) { + super(serdeConstants.TIMESTAMPTZ_TYPE_NAME); + if (timeZoneStr == null || timeZoneStr.trim().isEmpty() || + timeZoneStr.trim().toLowerCase().equals("local")) { + // default + this.timeZone = ZoneId.systemDefault(); + } + try { + this.timeZone = ZoneId.of(timeZoneStr); + } catch (DateTimeException e1) { + // default + this.timeZone = ZoneId.systemDefault(); + } + } + + @Override + public String getTypeName() { + return serdeConstants.TIMESTAMPTZ_TYPE_NAME; + } + + @Override + public void setTypeName(String typeName) { + // No need to set type name, it should always be timestamptz + return; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + TimestampTZTypeInfo dti = (TimestampTZTypeInfo) other; + + return this.timeZone().equals(dti.timeZone()); + } + + /** + * Generate the hashCode for this TypeInfo. + */ + @Override + public int hashCode() { + return Objects.hash(typeName, timeZone); + } + + @Override + public String toString() { + return getQualifiedName(); + } + + @Override + public String getQualifiedName() { + return getQualifiedName(timeZone); + } + + public static String getQualifiedName(ZoneId timeZone) { + StringBuilder sb = new StringBuilder(serdeConstants.TIMESTAMPTZ_TYPE_NAME); + sb.append("('"); + sb.append(timeZone); + sb.append("')"); + return sb.toString(); + } + + public ZoneId timeZone() { + return timeZone; + } + + public ZoneId getTimeZone() { + return timeZone; + } + + public void setTimeZone(ZoneId timeZone) { + this.timeZone = timeZone; + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java index b0e0bf2..b73d6ee 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.serde2.typeinfo; +import java.time.ZoneId; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ConcurrentHashMap; @@ -55,8 +56,6 @@ private TypeInfoFactory() { public static final PrimitiveTypeInfo shortTypeInfo = new PrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME); public static final PrimitiveTypeInfo dateTypeInfo = new PrimitiveTypeInfo(serdeConstants.DATE_TYPE_NAME); public static final PrimitiveTypeInfo timestampTypeInfo = new PrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME); - public static final PrimitiveTypeInfo timestampTZTypeInfo = - new PrimitiveTypeInfo(serdeConstants.TIMESTAMPTZ_TYPE_NAME); public static final PrimitiveTypeInfo intervalYearMonthTypeInfo = new PrimitiveTypeInfo(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); public static final PrimitiveTypeInfo intervalDayTimeTypeInfo = new PrimitiveTypeInfo(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); public static final PrimitiveTypeInfo binaryTypeInfo = new PrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME); @@ -67,6 +66,12 @@ private TypeInfoFactory() { public static final DecimalTypeInfo decimalTypeInfo = new DecimalTypeInfo(HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + /** + * A TimestampTZTypeInfo with system default time zone. + */ + public static final TimestampTZTypeInfo timestampTZTypeInfo = new TimestampTZTypeInfo( + ZoneId.systemDefault().getId()); + public static final PrimitiveTypeInfo unknownTypeInfo = new PrimitiveTypeInfo("unknown"); // Map from type name (such as int or varchar(40) to the corresponding PrimitiveTypeInfo @@ -158,6 +163,11 @@ private static PrimitiveTypeInfo createPrimitiveTypeInfo(String fullName) { } return new DecimalTypeInfo(Integer.valueOf(parts.typeParams[0]), Integer.valueOf(parts.typeParams[1])); + case TIMESTAMPTZ: + if (parts.typeParams.length != 1) { + return null; + } + return new TimestampTZTypeInfo(parts.typeParams[0]); default: return null; } @@ -178,6 +188,11 @@ public static DecimalTypeInfo getDecimalTypeInfo(int precision, int scale) { return (DecimalTypeInfo) getPrimitiveTypeInfo(fullName); }; + public static TimestampTZTypeInfo getTimestampTZTypeInfo(ZoneId defaultTimeZone) { + String fullName = TimestampTZTypeInfo.getQualifiedName(defaultTimeZone); + return (TimestampTZTypeInfo) getPrimitiveTypeInfo(fullName); + }; + public static TypeInfo getPrimitiveTypeInfoFromPrimitiveWritable( Class clazz) { String typeName = PrimitiveObjectInspectorUtils diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 8be6896..47ca33e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -48,6 +48,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * TypeInfoUtils. @@ -55,6 +57,8 @@ */ public final class TypeInfoUtils { + protected static final Logger LOG = LoggerFactory.getLogger(TypeInfoUtils.class); + public static List numericTypeList = new ArrayList(); // The ordering of types here is used to determine which numeric types // are common/convertible to one another. Probably better to rely on the @@ -297,6 +301,20 @@ private static boolean isTypeChar(char c) { int end = 1; while (end <= typeInfoString.length()) { // last character ends a token? + if (begin > 0 && + typeInfoString.charAt(begin - 1) == '(' && + typeInfoString.charAt(begin) == '\'') { + // Ignore starting quote + begin++; + do { + end++; + } while (typeInfoString.charAt(end) != '\''); + } else if (typeInfoString.charAt(begin) == '\'' && + typeInfoString.charAt(begin + 1) == ')') { + // Ignore closing quote + begin++; + end++; + } if (end == typeInfoString.length() || !isTypeChar(typeInfoString.charAt(end - 1)) || !isTypeChar(typeInfoString.charAt(end))) { @@ -443,6 +461,7 @@ private TypeInfo parseType() { "Type " + typeEntry.typeName+ " only takes one parameter, but " + params.length + " is seen"); } + case DECIMAL: int precision = HiveDecimal.USER_DEFAULT_PRECISION; int scale = HiveDecimal.USER_DEFAULT_SCALE; @@ -462,8 +481,8 @@ private TypeInfo parseType() { throw new IllegalArgumentException("Type decimal only takes two parameter, but " + params.length + " is seen"); } - return TypeInfoFactory.getDecimalTypeInfo(precision, scale); + default: return TypeInfoFactory.getPrimitiveTypeInfo(typeEntry.typeName); } diff --git serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampTZWritable.java serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampTZWritable.java index 73b81b9..0c32306 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampTZWritable.java +++ serde/src/test/org/apache/hadoop/hive/serde2/io/TestTimestampTZWritable.java @@ -25,6 +25,7 @@ import org.junit.Rule; import org.junit.Test; +import java.time.ZoneId; import java.util.concurrent.ThreadLocalRandom; public class TestTimestampTZWritable { @@ -37,7 +38,7 @@ public void testSeconds() { // just 1 VInt long seconds = ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE); - TimestampTZ tstz = new TimestampTZ(seconds, 0); + TimestampTZ tstz = new TimestampTZ(seconds, 0, ZoneId.of("UTC")); verifyConversion(tstz); // 2 VInt @@ -45,7 +46,7 @@ public void testSeconds() { if (ThreadLocalRandom.current().nextBoolean()) { seconds = -seconds; } - tstz.set(seconds, 0); + tstz.set(seconds, 0, ZoneId.of("UTC")); verifyConversion(tstz); } @@ -59,7 +60,7 @@ public void testSecondsWithNanos() { int nanos = ThreadLocalRandom.current().nextInt(999999999) + 1; - TimestampTZ tstz = new TimestampTZ(seconds, nanos); + TimestampTZ tstz = new TimestampTZ(seconds, nanos, ZoneId.of("UTC")); verifyConversion(tstz); } @@ -92,7 +93,7 @@ public void testComparison() { private static void verifyConversion(TimestampTZ srcTstz) { TimestampTZWritable src = new TimestampTZWritable(srcTstz); byte[] bytes = src.getBytes(); - TimestampTZWritable dest = new TimestampTZWritable(bytes, 0); + TimestampTZWritable dest = new TimestampTZWritable(bytes, 0, ZoneId.of("UTC")); TimestampTZ destTstz = dest.getTimestampTZ(); String errMsg = "Src tstz with seconds " + srcTstz.getEpochSecond() + ", nanos " + srcTstz.getNanos() + ". Dest tstz with seconds " + destTstz.getEpochSecond() +