diff --git common/pom.xml common/pom.xml
index ede8aea..16e8f48 100644
--- common/pom.xml
+++ common/pom.xml
@@ -56,6 +56,11 @@
${commons-logging.version}
+ joda-time
+ joda-time
+ ${joda.version}
+
+
log4j
log4j
${log4j.version}
diff --git common/src/java/org/apache/hive/common/util/TimestampParser.java common/src/java/org/apache/hive/common/util/TimestampParser.java
new file mode 100644
index 0000000..02cfa0d
--- /dev/null
+++ common/src/java/org/apache/hive/common/util/TimestampParser.java
@@ -0,0 +1,137 @@
+package org.apache.hive.common.util;
+
+import java.math.BigDecimal;
+import java.sql.Timestamp;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.joda.time.DateTime;
+import org.joda.time.MutableDateTime;
+import org.joda.time.DateTimeFieldType;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.joda.time.format.DateTimeFormatterBuilder;
+import org.joda.time.format.DateTimeParser;
+import org.joda.time.format.DateTimeParserBucket;
+
+/**
+ * Timestamp parser using Joda DateTimeFormatter. Parser accepts 0 or more date time format
+ * patterns. If no format patterns are provided it will default to the normal Timestamp parsing.
+ * Datetime formats are compatible with Java SimpleDateFormat. Also added special case pattern
+ * "millis" to parse the string as milliseconds since Unix epoch.
+ * Since this uses Joda DateTimeFormatter, this parser should be thread safe.
+ */
+public class TimestampParser {
+
+ protected final static String[] stringArray = new String[] {};
+ protected final static String millisFormatString = "millis";
+ protected final static DateTime startingDateValue = new DateTime(1970, 1, 1, 0, 0, 0, 0);
+
+ protected String[] formatStrings = null;
+ protected DateTimeFormatter fmt = null;
+
+ public TimestampParser() {
+ }
+
+ public TimestampParser(TimestampParser tsParser) {
+ this(tsParser.formatStrings == null ?
+ null : Arrays.copyOf(tsParser.formatStrings, tsParser.formatStrings.length));
+ }
+
+ public TimestampParser(List formatStrings) {
+ this(formatStrings == null ? null : formatStrings.toArray(stringArray));
+ }
+
+ public TimestampParser(String[] formatStrings) {
+ this.formatStrings = formatStrings;
+
+ // create formatter that includes all of the input patterns
+ if (formatStrings != null && formatStrings.length > 0) {
+ DateTimeParser[] parsers = new DateTimeParser[formatStrings.length];
+ for (int idx = 0; idx < formatStrings.length; ++idx) {
+ String formatString = formatStrings[idx];
+ if (formatString.equalsIgnoreCase(millisFormatString)) {
+ // Use milliseconds parser if pattern matches our special-case millis pattern string
+ parsers[idx] = new MillisDateFormatParser();
+ } else {
+ parsers[idx] = DateTimeFormat.forPattern(formatString).getParser();
+ }
+ }
+ fmt = new DateTimeFormatterBuilder().append(null, parsers).toFormatter();
+ }
+ }
+
+ /**
+ * Parse the input string and return a timestamp value
+ * @param strValue
+ * @return
+ * @throws IllegalArgumentException if input string cannot be parsed into timestamp
+ */
+ public Timestamp parseTimestamp(String strValue) throws IllegalArgumentException {
+ if (fmt != null) {
+ // reset value in case any date fields are missing from the date pattern
+ MutableDateTime mdt = new MutableDateTime(startingDateValue);
+
+ // Using parseInto() avoids throwing exception when parsing,
+ // allowing fallback to default timestamp parsing if custom patterns fail.
+ int ret = fmt.parseInto(mdt, strValue, 0);
+ // Only accept parse results if we parsed the entire string
+ if (ret == strValue.length()) {
+ return new Timestamp(mdt.getMillis());
+ }
+ }
+
+ // Otherwise try default timestamp parsing
+ return Timestamp.valueOf(strValue);
+ }
+
+ /**
+ * DateTimeParser to parse the date string as the millis since Unix epoch
+ */
+ public static class MillisDateFormatParser implements DateTimeParser {
+ private static final ThreadLocal numericMatcher = new ThreadLocal() {
+ @Override
+ protected Matcher initialValue() {
+ return Pattern.compile("-?\\d+(\\.\\d+)?$").matcher("");
+ }
+ };
+
+ private final static DateTimeFieldType[] dateTimeFields = {
+ DateTimeFieldType.year(),
+ DateTimeFieldType.monthOfYear(),
+ DateTimeFieldType.dayOfMonth(),
+ DateTimeFieldType.hourOfDay(),
+ DateTimeFieldType.minuteOfHour(),
+ DateTimeFieldType.secondOfMinute(),
+ DateTimeFieldType.millisOfSecond()
+ };
+
+ public int estimateParsedLength() {
+ return 13; // Shouldn't hit 14 digits until year 2286
+ }
+
+ protected static boolean isNumericString(String val) {
+ Matcher matcher = numericMatcher.get();
+ matcher.reset(val);
+ return matcher.matches();
+ }
+
+ public int parseInto(DateTimeParserBucket bucket, String text, int position) {
+ String substr = text.substring(position);
+ if (!isNumericString(substr)) {
+ return -1;
+ }
+ BigDecimal bd = new BigDecimal(substr);
+ // Joda DateTime only has precision to millis, so any fractional portion must be cut off
+ long millis = bd.longValue();
+ DateTime dt = new DateTime(millis);
+ for (DateTimeFieldType field : dateTimeFields) {
+ bucket.saveField(field, dt.get(field));
+ }
+ return substr.length();
+ }
+ }
+}
diff --git common/src/test/org/apache/hive/common/util/TestTimestampParser.java common/src/test/org/apache/hive/common/util/TestTimestampParser.java
new file mode 100644
index 0000000..9eb1dc5
--- /dev/null
+++ common/src/test/org/apache/hive/common/util/TestTimestampParser.java
@@ -0,0 +1,160 @@
+package org.apache.hive.common.util;
+
+import java.sql.Timestamp;
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+public class TestTimestampParser {
+ public static class ValidTimestampCase {
+ String strValue;
+ Timestamp expectedValue;
+
+ public ValidTimestampCase(String strValue, Timestamp expectedValue) {
+ this.strValue = strValue;
+ this.expectedValue = expectedValue;
+ }
+ }
+
+ static void testValidCases(TimestampParser tp, ValidTimestampCase[] validCases) {
+ for (ValidTimestampCase validCase : validCases) {
+ Timestamp ts = tp.parseTimestamp(validCase.strValue);
+ assertEquals("Parsing " + validCase.strValue, validCase.expectedValue, ts);
+ }
+ }
+
+ static void testInvalidCases(TimestampParser tp, String[] invalidCases) {
+ for (String invalidString : invalidCases) {
+ try {
+ Timestamp ts = tp.parseTimestamp(invalidString);
+ fail("Expected exception parsing " + invalidString + ", but parsed value to " + ts);
+ } catch (IllegalArgumentException err) {
+ // Exception expected
+ }
+ }
+ }
+
+ @Test
+ public void testDefault() {
+ // No timestamp patterns, should default to normal timestamp format
+ TimestampParser tp = new TimestampParser();
+ ValidTimestampCase[] validCases = {
+ new ValidTimestampCase("1945-12-31 23:59:59.0",
+ Timestamp.valueOf("1945-12-31 23:59:59.0")),
+ new ValidTimestampCase("1945-12-31 23:59:59.1234",
+ Timestamp.valueOf("1945-12-31 23:59:59.1234")),
+ new ValidTimestampCase("1970-01-01 00:00:00",
+ Timestamp.valueOf("1970-01-01 00:00:00")),
+ };
+
+ String[] invalidCases = {
+ "1945-12-31T23:59:59",
+ "12345",
+ };
+
+ testValidCases(tp, validCases);
+ testInvalidCases(tp, invalidCases);
+ }
+
+ @Test
+ public void testPattern1() {
+ // Joda pattern matching expects fractional seconds length to match
+ // the number of 'S' in the pattern. So if you want to match .1, .12, .123,
+ // you need 3 different patterns with .S, .SS, .SSS
+ String[] patterns = {
+ // ISO-8601 timestamps
+ "yyyy-MM-dd'T'HH:mm:ss",
+ "yyyy-MM-dd'T'HH:mm:ss.S",
+ "yyyy-MM-dd'T'HH:mm:ss.SS",
+ "yyyy-MM-dd'T'HH:mm:ss.SSS",
+ "yyyy-MM-dd'T'HH:mm:ss.SSSS",
+ };
+ TimestampParser tp = new TimestampParser(patterns);
+
+ ValidTimestampCase[] validCases = {
+ new ValidTimestampCase("1945-12-31T23:59:59.0",
+ Timestamp.valueOf("1945-12-31 23:59:59.0")),
+ new ValidTimestampCase("2001-01-01 00:00:00.100",
+ Timestamp.valueOf("2001-01-01 00:00:00.100")),
+ new ValidTimestampCase("2001-01-01 00:00:00.001",
+ Timestamp.valueOf("2001-01-01 00:00:00.001")),
+ // Joda parsing only supports up to millisecond precision
+ new ValidTimestampCase("1945-12-31T23:59:59.1234",
+ Timestamp.valueOf("1945-12-31 23:59:59.123")),
+ new ValidTimestampCase("1970-01-01T00:00:00",
+ Timestamp.valueOf("1970-01-01 00:00:00")),
+ new ValidTimestampCase("1970-4-5T6:7:8",
+ Timestamp.valueOf("1970-04-05 06:07:08")),
+
+ // Default timestamp format still works?
+ new ValidTimestampCase("2001-01-01 00:00:00",
+ Timestamp.valueOf("2001-01-01 00:00:00")),
+ new ValidTimestampCase("1945-12-31 23:59:59.1234",
+ Timestamp.valueOf("1945-12-31 23:59:59.1234")),
+ };
+
+ String[] invalidCases = {
+ "1945-12-31-23:59:59",
+ "1945-12-31T23:59:59.12345", // our pattern didn't specify 5 decimal places
+ "12345",
+ };
+
+ testValidCases(tp, validCases);
+ testInvalidCases(tp, invalidCases);
+ }
+
+ @Test
+ public void testMillisParser() {
+ String[] patterns = {
+ "millis",
+ // Also try other patterns
+ "yyyy-MM-dd'T'HH:mm:ss",
+ };
+ TimestampParser tp = new TimestampParser(patterns);
+
+ ValidTimestampCase[] validCases = {
+ new ValidTimestampCase("0", new Timestamp(0)),
+ new ValidTimestampCase("-1000000", new Timestamp(-1000000)),
+ new ValidTimestampCase("1420509274123", new Timestamp(1420509274123L)),
+ new ValidTimestampCase("1420509274123.456789", new Timestamp(1420509274123L)),
+
+ // Other format pattern should also work
+ new ValidTimestampCase("1945-12-31T23:59:59",
+ Timestamp.valueOf("1945-12-31 23:59:59")),
+ };
+
+ String[] invalidCases = {
+ "1945-12-31-23:59:59",
+ "1945-12-31T23:59:59.12345", // our pattern didn't specify 5 decimal places
+ "1420509274123-",
+ };
+
+ testValidCases(tp, validCases);
+ testInvalidCases(tp, invalidCases);
+ }
+
+ @Test
+ public void testPattern2() {
+ // Pattern does not contain all date fields
+ String[] patterns = {
+ "HH:mm",
+ "MM:dd:ss",
+ };
+ TimestampParser tp = new TimestampParser(patterns);
+
+ ValidTimestampCase[] validCases = {
+ new ValidTimestampCase("05:06",
+ Timestamp.valueOf("1970-01-01 05:06:00")),
+ new ValidTimestampCase("05:06:07",
+ Timestamp.valueOf("1970-05-06 00:00:07")),
+ };
+
+ String[] invalidCases = {
+ "1945-12-31T23:59:59",
+ "1945:12:31-",
+ "12345",
+ };
+
+ testValidCases(tp, validCases);
+ testInvalidCases(tp, invalidCases);
+ }
+}
diff --git data/files/ts_formats.txt data/files/ts_formats.txt
new file mode 100644
index 0000000..e13718b
--- /dev/null
+++ data/files/ts_formats.txt
@@ -0,0 +1,21 @@
+2011-01-01 01:01:01.1111111112011-01-01 01:01:01.1111111112011-01-01T01:01:01.1111111112011-01-01T01:01:01.1111111112011-01-01T01:01:012011-01-01T01:01:01
+2012-02-02 02:02:02.2222222222012-02-02 02:02:02.2222222222012-02-02T02:02:02.2222222222012-02-02T02:02:02.2222222222012-02-02T02:02:022012-02-02T02:02:02
+2013-03-03 03:03:03.3333333332013-03-03 03:03:03.3333333332013-03-03T03:03:03.3333333332013-03-03T03:03:03.3333333332013-03-03T03:03:032013-03-03T03:03:03
+2014-04-04 04:04:04.4444444442014-04-04 04:04:04.4444444442014-04-04T04:04:04.4444444442014-04-04T04:04:04.4444444442014-04-04T04:04:042014-04-04T04:04:04
+2015-05-05 05:05:05.5555555552015-05-05 05:05:05.5555555552015-05-05T05:05:05.5555555552015-05-05T05:05:05.5555555552015-05-05T05:05:052015-05-05T05:05:05
+2016-06-06 06:06:06.6666666662016-06-06 06:06:06.6666666662016-06-06T06:06:06.6666666662016-06-06T06:06:06.6666666662016-06-06T06:06:062016-06-06T06:06:06
+2017-07-07 07:07:07.7777777772017-07-07 07:07:07.7777777772017-07-07T07:07:07.7777777772017-07-07T07:07:07.7777777772017-07-07T07:07:072017-07-07T07:07:07
+2018-08-08 08:08:08.8888888882018-08-08 08:08:08.8888888882018-08-08T08:08:08.8888888882018-08-08T08:08:08.8888888882018-08-08T08:08:082018-08-08T08:08:08
+2019-09-09 09:09:09.9999999992019-09-09 09:09:09.9999999992019-09-09T09:09:09.9999999992019-09-09T09:09:09.9999999992019-09-09T09:09:092019-09-09T09:09:09
+2020-10-10 10:10:10.1010101012020-10-10 10:10:10.1010101012020-10-10T10:10:10.1010101012020-10-10T10:10:10.1010101012020-10-10T10:10:102020-10-10T10:10:10
+2021-11-11 11:11:11.1111111112021-11-11 11:11:11.1111111112021-11-11T11:11:11.1111111112021-11-11T11:11:11.1111111112021-11-11T11:11:112021-11-11T11:11:11
+2022-12-12 12:12:12.1212121212022-12-12 12:12:12.1212121212022-12-12T12:12:12.1212121212022-12-12T12:12:12.1212121212022-12-12T12:12:122022-12-12T12:12:12
+2023-01-02 13:13:13.1313131312023-01-02 13:13:13.1313131312023-01-02T13:13:13.1313131312023-01-02T13:13:13.1313131312023-01-02T13:13:132023-01-02T13:13:13
+2024-02-02 14:14:14.1414141412024-02-02 14:14:14.1414141412024-02-02T14:14:14.1414141412024-02-02T14:14:14.1414141412024-02-02T14:14:142024-02-02T14:14:14
+2025-03-03 15:15:15.1515151512025-03-03 15:15:15.1515151512025-03-03T15:15:15.1515151512025-03-03T15:15:15.1515151512025-03-03T15:15:152025-03-03T15:15:15
+2026-04-04 16:16:16.1616161612026-04-04 16:16:16.1616161612026-04-04T16:16:16.1616161612026-04-04T16:16:16.1616161612026-04-04T16:16:162026-04-04T16:16:16
+2027-05-05 17:17:17.1717171712027-05-05 17:17:17.1717171712027-05-05T17:17:17.1717171712027-05-05T17:17:17.1717171712027-05-05T17:17:172027-05-05T17:17:17
+2028-06-06 18:18:18.1818181812028-06-06 18:18:18.1818181812028-06-06T18:18:18.1818181812028-06-06T18:18:18.1818181812028-06-06T18:18:182028-06-06T18:18:18
+2029-07-07 19:19:19.1919191912029-07-07 19:19:19.1919191912029-07-07T19:19:19.1919191912029-07-07T19:19:19.1919191912029-07-07T19:19:192029-07-07T19:19:19
+2030-08-08 20:20:20.2020202022030-08-08 20:20:20.2020202022030-08-08T20:20:20.2020202022030-08-08T20:20:20.2020202022030-08-08T20:20:202030-08-08T20:20:20
+2031-09-09 21:21:21.2121212122031-09-09 21:21:21.2121212122031-09-09T21:21:21.2121212122031-09-09T21:21:21.2121212122031-09-09T21:21:212031-09-09T21:21:21
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
index 78f23cb..fe4b31c 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
@@ -25,8 +25,12 @@
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.Text;
// Does same thing with LazyFactory#createLazyObjectInspector except that this replaces
// original keyOI with OI which is create by HBaseKeyFactory provided by serde property for hbase
@@ -46,8 +50,8 @@ public static ObjectInspector createLazyHBaseStructInspector(
}
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
- serdeParams.getColumnNames(), columnObjectInspectors, serdeParams.getSeparators()[0],
+ serdeParams.getColumnNames(), columnObjectInspectors, null, serdeParams.getSeparators()[0],
serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(),
- serdeParams.isEscaped(), serdeParams.getEscapeChar());
+ new LazyObjectInspectorParametersImpl(serdeParams), ObjectInspectorOptions.JAVA);
}
}
\ No newline at end of file
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
index a2ba827..0f31d2f 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
@@ -75,8 +75,7 @@ public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Propertie
public ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException {
ObjectInspector oi =
LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), 1,
- serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(),
- ObjectInspectorOptions.AVRO);
+ serdeParams.getNullSequence(), serdeParams, ObjectInspectorOptions.AVRO);
// initialize the object inspectors
initInternalObjectInspectors(oi);
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
index e60b844..1e85aa2 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
@@ -29,6 +29,7 @@
import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -63,7 +64,7 @@ public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Propertie
public ObjectInspector createValueObjectInspector(TypeInfo type)
throws SerDeException {
return LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(),
- 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
+ 1, serdeParams.getNullSequence(), serdeParams, ObjectInspectorOptions.JAVA);
}
@Override
diff --git hbase-handler/src/test/queries/positive/hbase_timestamp_format.q hbase-handler/src/test/queries/positive/hbase_timestamp_format.q
new file mode 100644
index 0000000..a8d5501
--- /dev/null
+++ hbase-handler/src/test/queries/positive/hbase_timestamp_format.q
@@ -0,0 +1,21 @@
+
+create table hbase_str(rowkey string,mytime string,mystr string)
+ STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+ WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr')
+ TBLPROPERTIES ('hbase.table.name' = 'hbase_ts');
+
+describe hbase_str;
+insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456', value from src limit 3;
+select * from hbase_str;
+
+-- Timestamp string does not match the default timestamp format, specify a custom timestamp format
+create external table hbase_ts(rowkey string,mytime timestamp,mystr string)
+ STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+ WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr', 'timestamp.formats' = 'yyyy-MM-dd-HH.mm.ss.SSSSSS')
+ TBLPROPERTIES ('hbase.table.name' = 'hbase_ts');
+
+describe hbase_ts;
+select * from hbase_ts;
+
+drop table hbase_str;
+drop table hbase_ts;
diff --git hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out
new file mode 100644
index 0000000..138cfe6
--- /dev/null
+++ hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out
@@ -0,0 +1,94 @@
+PREHOOK: query: create table hbase_str(rowkey string,mytime string,mystr string)
+ STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+ WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr')
+ TBLPROPERTIES ('hbase.table.name' = 'hbase_ts')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hbase_str
+POSTHOOK: query: create table hbase_str(rowkey string,mytime string,mystr string)
+ STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+ WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr')
+ TBLPROPERTIES ('hbase.table.name' = 'hbase_ts')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hbase_str
+PREHOOK: query: describe hbase_str
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@hbase_str
+POSTHOOK: query: describe hbase_str
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@hbase_str
+rowkey string from deserializer
+mytime string from deserializer
+mystr string from deserializer
+PREHOOK: query: insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456', value from src limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@hbase_str
+POSTHOOK: query: insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456', value from src limit 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@hbase_str
+PREHOOK: query: select * from hbase_str
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_str
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_str
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_str
+#### A masked pattern was here ####
+238 2001-02-03-04.05.06.123456 val_238
+311 2001-02-03-04.05.06.123456 val_311
+86 2001-02-03-04.05.06.123456 val_86
+PREHOOK: query: -- Timestamp string does not match the default timestamp format, specify a custom timestamp format
+create external table hbase_ts(rowkey string,mytime timestamp,mystr string)
+ STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+ WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr', 'timestamp.formats' = 'yyyy-MM-dd-HH.mm.ss.SSSSSS')
+ TBLPROPERTIES ('hbase.table.name' = 'hbase_ts')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hbase_ts
+POSTHOOK: query: -- Timestamp string does not match the default timestamp format, specify a custom timestamp format
+create external table hbase_ts(rowkey string,mytime timestamp,mystr string)
+ STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
+ WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr', 'timestamp.formats' = 'yyyy-MM-dd-HH.mm.ss.SSSSSS')
+ TBLPROPERTIES ('hbase.table.name' = 'hbase_ts')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hbase_ts
+PREHOOK: query: describe hbase_ts
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@hbase_ts
+POSTHOOK: query: describe hbase_ts
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@hbase_ts
+rowkey string from deserializer
+mytime timestamp from deserializer
+mystr string from deserializer
+PREHOOK: query: select * from hbase_ts
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hbase_ts
+#### A masked pattern was here ####
+POSTHOOK: query: select * from hbase_ts
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hbase_ts
+#### A masked pattern was here ####
+238 2001-02-03 04:05:06.123 val_238
+311 2001-02-03 04:05:06.123 val_311
+86 2001-02-03 04:05:06.123 val_86
+PREHOOK: query: drop table hbase_str
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@hbase_str
+PREHOOK: Output: default@hbase_str
+POSTHOOK: query: drop table hbase_str
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@hbase_str
+POSTHOOK: Output: default@hbase_str
+PREHOOK: query: drop table hbase_ts
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@hbase_ts
+PREHOOK: Output: default@hbase_ts
+POSTHOOK: query: drop table hbase_ts
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@hbase_ts
+POSTHOOK: Output: default@hbase_ts
diff --git pom.xml pom.xml
index 416bc7a..56cb076 100644
--- pom.xml
+++ pom.xml
@@ -134,6 +134,7 @@
1.14
2.12
1.1
+ 2.5
3.5.2
20090211
4.11
diff --git ql/src/test/queries/clientpositive/timestamp_formats.q ql/src/test/queries/clientpositive/timestamp_formats.q
new file mode 100644
index 0000000..b187b40
--- /dev/null
+++ ql/src/test/queries/clientpositive/timestamp_formats.q
@@ -0,0 +1,23 @@
+
+CREATE TABLE timestamp_formats (
+ c1 string,
+ c1_ts timestamp,
+ c2 string,
+ c2_ts timestamp,
+ c3 string,
+ c3_ts timestamp
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/ts_formats.txt' overwrite into table timestamp_formats;
+
+SELECT * FROM timestamp_formats;
+
+-- Add single timestamp format. This should allow c3_ts to parse
+ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss");
+SELECT * FROM timestamp_formats;
+
+-- Add another format, to allow c2_ts to parse
+ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss,yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS");
+SELECT * FROM timestamp_formats;
+
+DROP TABLE timestamp_formats;
diff --git ql/src/test/results/clientpositive/timestamp_formats.q.out ql/src/test/results/clientpositive/timestamp_formats.q.out
new file mode 100644
index 0000000..9cc9b29
--- /dev/null
+++ ql/src/test/results/clientpositive/timestamp_formats.q.out
@@ -0,0 +1,145 @@
+PREHOOK: query: CREATE TABLE timestamp_formats (
+ c1 string,
+ c1_ts timestamp,
+ c2 string,
+ c2_ts timestamp,
+ c3 string,
+ c3_ts timestamp
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@timestamp_formats
+POSTHOOK: query: CREATE TABLE timestamp_formats (
+ c1 string,
+ c1_ts timestamp,
+ c2 string,
+ c2_ts timestamp,
+ c3 string,
+ c3_ts timestamp
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@timestamp_formats
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ts_formats.txt' overwrite into table timestamp_formats
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@timestamp_formats
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ts_formats.txt' overwrite into table timestamp_formats
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@timestamp_formats
+PREHOOK: query: SELECT * FROM timestamp_formats
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp_formats
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM timestamp_formats
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp_formats
+#### A masked pattern was here ####
+2011-01-01 01:01:01.111111111 2011-01-01 01:01:01.111111111 2011-01-01T01:01:01.111111111 NULL 2011-01-01T01:01:01 NULL
+2012-02-02 02:02:02.222222222 2012-02-02 02:02:02.222222222 2012-02-02T02:02:02.222222222 NULL 2012-02-02T02:02:02 NULL
+2013-03-03 03:03:03.333333333 2013-03-03 03:03:03.333333333 2013-03-03T03:03:03.333333333 NULL 2013-03-03T03:03:03 NULL
+2014-04-04 04:04:04.444444444 2014-04-04 04:04:04.444444444 2014-04-04T04:04:04.444444444 NULL 2014-04-04T04:04:04 NULL
+2015-05-05 05:05:05.555555555 2015-05-05 05:05:05.555555555 2015-05-05T05:05:05.555555555 NULL 2015-05-05T05:05:05 NULL
+2016-06-06 06:06:06.666666666 2016-06-06 06:06:06.666666666 2016-06-06T06:06:06.666666666 NULL 2016-06-06T06:06:06 NULL
+2017-07-07 07:07:07.777777777 2017-07-07 07:07:07.777777777 2017-07-07T07:07:07.777777777 NULL 2017-07-07T07:07:07 NULL
+2018-08-08 08:08:08.888888888 2018-08-08 08:08:08.888888888 2018-08-08T08:08:08.888888888 NULL 2018-08-08T08:08:08 NULL
+2019-09-09 09:09:09.999999999 2019-09-09 09:09:09.999999999 2019-09-09T09:09:09.999999999 NULL 2019-09-09T09:09:09 NULL
+2020-10-10 10:10:10.101010101 2020-10-10 10:10:10.101010101 2020-10-10T10:10:10.101010101 NULL 2020-10-10T10:10:10 NULL
+2021-11-11 11:11:11.111111111 2021-11-11 11:11:11.111111111 2021-11-11T11:11:11.111111111 NULL 2021-11-11T11:11:11 NULL
+2022-12-12 12:12:12.121212121 2022-12-12 12:12:12.121212121 2022-12-12T12:12:12.121212121 NULL 2022-12-12T12:12:12 NULL
+2023-01-02 13:13:13.131313131 2023-01-02 13:13:13.131313131 2023-01-02T13:13:13.131313131 NULL 2023-01-02T13:13:13 NULL
+2024-02-02 14:14:14.141414141 2024-02-02 14:14:14.141414141 2024-02-02T14:14:14.141414141 NULL 2024-02-02T14:14:14 NULL
+2025-03-03 15:15:15.151515151 2025-03-03 15:15:15.151515151 2025-03-03T15:15:15.151515151 NULL 2025-03-03T15:15:15 NULL
+2026-04-04 16:16:16.161616161 2026-04-04 16:16:16.161616161 2026-04-04T16:16:16.161616161 NULL 2026-04-04T16:16:16 NULL
+2027-05-05 17:17:17.171717171 2027-05-05 17:17:17.171717171 2027-05-05T17:17:17.171717171 NULL 2027-05-05T17:17:17 NULL
+2028-06-06 18:18:18.181818181 2028-06-06 18:18:18.181818181 2028-06-06T18:18:18.181818181 NULL 2028-06-06T18:18:18 NULL
+2029-07-07 19:19:19.191919191 2029-07-07 19:19:19.191919191 2029-07-07T19:19:19.191919191 NULL 2029-07-07T19:19:19 NULL
+2030-08-08 20:20:20.202020202 2030-08-08 20:20:20.202020202 2030-08-08T20:20:20.202020202 NULL 2030-08-08T20:20:20 NULL
+2031-09-09 21:21:21.212121212 2031-09-09 21:21:21.212121212 2031-09-09T21:21:21.212121212 NULL 2031-09-09T21:21:21 NULL
+PREHOOK: query: -- Add single timestamp format. This should allow c3_ts to parse
+ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss")
+PREHOOK: type: ALTERTABLE_SERDEPROPERTIES
+PREHOOK: Input: default@timestamp_formats
+PREHOOK: Output: default@timestamp_formats
+POSTHOOK: query: -- Add single timestamp format. This should allow c3_ts to parse
+ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss")
+POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES
+POSTHOOK: Input: default@timestamp_formats
+POSTHOOK: Output: default@timestamp_formats
+PREHOOK: query: SELECT * FROM timestamp_formats
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp_formats
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM timestamp_formats
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp_formats
+#### A masked pattern was here ####
+2011-01-01 01:01:01.111111111 2011-01-01 01:01:01.111111111 2011-01-01T01:01:01.111111111 NULL 2011-01-01T01:01:01 2011-01-01 01:01:01
+2012-02-02 02:02:02.222222222 2012-02-02 02:02:02.222222222 2012-02-02T02:02:02.222222222 NULL 2012-02-02T02:02:02 2012-02-02 02:02:02
+2013-03-03 03:03:03.333333333 2013-03-03 03:03:03.333333333 2013-03-03T03:03:03.333333333 NULL 2013-03-03T03:03:03 2013-03-03 03:03:03
+2014-04-04 04:04:04.444444444 2014-04-04 04:04:04.444444444 2014-04-04T04:04:04.444444444 NULL 2014-04-04T04:04:04 2014-04-04 04:04:04
+2015-05-05 05:05:05.555555555 2015-05-05 05:05:05.555555555 2015-05-05T05:05:05.555555555 NULL 2015-05-05T05:05:05 2015-05-05 05:05:05
+2016-06-06 06:06:06.666666666 2016-06-06 06:06:06.666666666 2016-06-06T06:06:06.666666666 NULL 2016-06-06T06:06:06 2016-06-06 06:06:06
+2017-07-07 07:07:07.777777777 2017-07-07 07:07:07.777777777 2017-07-07T07:07:07.777777777 NULL 2017-07-07T07:07:07 2017-07-07 07:07:07
+2018-08-08 08:08:08.888888888 2018-08-08 08:08:08.888888888 2018-08-08T08:08:08.888888888 NULL 2018-08-08T08:08:08 2018-08-08 08:08:08
+2019-09-09 09:09:09.999999999 2019-09-09 09:09:09.999999999 2019-09-09T09:09:09.999999999 NULL 2019-09-09T09:09:09 2019-09-09 09:09:09
+2020-10-10 10:10:10.101010101 2020-10-10 10:10:10.101010101 2020-10-10T10:10:10.101010101 NULL 2020-10-10T10:10:10 2020-10-10 10:10:10
+2021-11-11 11:11:11.111111111 2021-11-11 11:11:11.111111111 2021-11-11T11:11:11.111111111 NULL 2021-11-11T11:11:11 2021-11-11 11:11:11
+2022-12-12 12:12:12.121212121 2022-12-12 12:12:12.121212121 2022-12-12T12:12:12.121212121 NULL 2022-12-12T12:12:12 2022-12-12 12:12:12
+2023-01-02 13:13:13.131313131 2023-01-02 13:13:13.131313131 2023-01-02T13:13:13.131313131 NULL 2023-01-02T13:13:13 2023-01-02 13:13:13
+2024-02-02 14:14:14.141414141 2024-02-02 14:14:14.141414141 2024-02-02T14:14:14.141414141 NULL 2024-02-02T14:14:14 2024-02-02 14:14:14
+2025-03-03 15:15:15.151515151 2025-03-03 15:15:15.151515151 2025-03-03T15:15:15.151515151 NULL 2025-03-03T15:15:15 2025-03-03 15:15:15
+2026-04-04 16:16:16.161616161 2026-04-04 16:16:16.161616161 2026-04-04T16:16:16.161616161 NULL 2026-04-04T16:16:16 2026-04-04 16:16:16
+2027-05-05 17:17:17.171717171 2027-05-05 17:17:17.171717171 2027-05-05T17:17:17.171717171 NULL 2027-05-05T17:17:17 2027-05-05 17:17:17
+2028-06-06 18:18:18.181818181 2028-06-06 18:18:18.181818181 2028-06-06T18:18:18.181818181 NULL 2028-06-06T18:18:18 2028-06-06 18:18:18
+2029-07-07 19:19:19.191919191 2029-07-07 19:19:19.191919191 2029-07-07T19:19:19.191919191 NULL 2029-07-07T19:19:19 2029-07-07 19:19:19
+2030-08-08 20:20:20.202020202 2030-08-08 20:20:20.202020202 2030-08-08T20:20:20.202020202 NULL 2030-08-08T20:20:20 2030-08-08 20:20:20
+2031-09-09 21:21:21.212121212 2031-09-09 21:21:21.212121212 2031-09-09T21:21:21.212121212 NULL 2031-09-09T21:21:21 2031-09-09 21:21:21
+PREHOOK: query: -- Add another format, to allow c2_ts to parse
+ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss,yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS")
+PREHOOK: type: ALTERTABLE_SERDEPROPERTIES
+PREHOOK: Input: default@timestamp_formats
+PREHOOK: Output: default@timestamp_formats
+POSTHOOK: query: -- Add another format, to allow c2_ts to parse
+ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss,yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS")
+POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES
+POSTHOOK: Input: default@timestamp_formats
+POSTHOOK: Output: default@timestamp_formats
+PREHOOK: query: SELECT * FROM timestamp_formats
+PREHOOK: type: QUERY
+PREHOOK: Input: default@timestamp_formats
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM timestamp_formats
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@timestamp_formats
+#### A masked pattern was here ####
+2011-01-01 01:01:01.111111111 2011-01-01 01:01:01.111111111 2011-01-01T01:01:01.111111111 2011-01-01 01:01:01.111 2011-01-01T01:01:01 2011-01-01 01:01:01
+2012-02-02 02:02:02.222222222 2012-02-02 02:02:02.222222222 2012-02-02T02:02:02.222222222 2012-02-02 02:02:02.222 2012-02-02T02:02:02 2012-02-02 02:02:02
+2013-03-03 03:03:03.333333333 2013-03-03 03:03:03.333333333 2013-03-03T03:03:03.333333333 2013-03-03 03:03:03.333 2013-03-03T03:03:03 2013-03-03 03:03:03
+2014-04-04 04:04:04.444444444 2014-04-04 04:04:04.444444444 2014-04-04T04:04:04.444444444 2014-04-04 04:04:04.444 2014-04-04T04:04:04 2014-04-04 04:04:04
+2015-05-05 05:05:05.555555555 2015-05-05 05:05:05.555555555 2015-05-05T05:05:05.555555555 2015-05-05 05:05:05.555 2015-05-05T05:05:05 2015-05-05 05:05:05
+2016-06-06 06:06:06.666666666 2016-06-06 06:06:06.666666666 2016-06-06T06:06:06.666666666 2016-06-06 06:06:06.666 2016-06-06T06:06:06 2016-06-06 06:06:06
+2017-07-07 07:07:07.777777777 2017-07-07 07:07:07.777777777 2017-07-07T07:07:07.777777777 2017-07-07 07:07:07.777 2017-07-07T07:07:07 2017-07-07 07:07:07
+2018-08-08 08:08:08.888888888 2018-08-08 08:08:08.888888888 2018-08-08T08:08:08.888888888 2018-08-08 08:08:08.888 2018-08-08T08:08:08 2018-08-08 08:08:08
+2019-09-09 09:09:09.999999999 2019-09-09 09:09:09.999999999 2019-09-09T09:09:09.999999999 2019-09-09 09:09:09.999 2019-09-09T09:09:09 2019-09-09 09:09:09
+2020-10-10 10:10:10.101010101 2020-10-10 10:10:10.101010101 2020-10-10T10:10:10.101010101 2020-10-10 10:10:10.101 2020-10-10T10:10:10 2020-10-10 10:10:10
+2021-11-11 11:11:11.111111111 2021-11-11 11:11:11.111111111 2021-11-11T11:11:11.111111111 2021-11-11 11:11:11.111 2021-11-11T11:11:11 2021-11-11 11:11:11
+2022-12-12 12:12:12.121212121 2022-12-12 12:12:12.121212121 2022-12-12T12:12:12.121212121 2022-12-12 12:12:12.121 2022-12-12T12:12:12 2022-12-12 12:12:12
+2023-01-02 13:13:13.131313131 2023-01-02 13:13:13.131313131 2023-01-02T13:13:13.131313131 2023-01-02 13:13:13.131 2023-01-02T13:13:13 2023-01-02 13:13:13
+2024-02-02 14:14:14.141414141 2024-02-02 14:14:14.141414141 2024-02-02T14:14:14.141414141 2024-02-02 14:14:14.141 2024-02-02T14:14:14 2024-02-02 14:14:14
+2025-03-03 15:15:15.151515151 2025-03-03 15:15:15.151515151 2025-03-03T15:15:15.151515151 2025-03-03 15:15:15.151 2025-03-03T15:15:15 2025-03-03 15:15:15
+2026-04-04 16:16:16.161616161 2026-04-04 16:16:16.161616161 2026-04-04T16:16:16.161616161 2026-04-04 16:16:16.161 2026-04-04T16:16:16 2026-04-04 16:16:16
+2027-05-05 17:17:17.171717171 2027-05-05 17:17:17.171717171 2027-05-05T17:17:17.171717171 2027-05-05 17:17:17.171 2027-05-05T17:17:17 2027-05-05 17:17:17
+2028-06-06 18:18:18.181818181 2028-06-06 18:18:18.181818181 2028-06-06T18:18:18.181818181 2028-06-06 18:18:18.181 2028-06-06T18:18:18 2028-06-06 18:18:18
+2029-07-07 19:19:19.191919191 2029-07-07 19:19:19.191919191 2029-07-07T19:19:19.191919191 2029-07-07 19:19:19.191 2029-07-07T19:19:19 2029-07-07 19:19:19
+2030-08-08 20:20:20.202020202 2030-08-08 20:20:20.202020202 2030-08-08T20:20:20.202020202 2030-08-08 20:20:20.202 2030-08-08T20:20:20 2030-08-08 20:20:20
+2031-09-09 21:21:21.212121212 2031-09-09 21:21:21.212121212 2031-09-09T21:21:21.212121212 2031-09-09 21:21:21.212 2031-09-09T21:21:21 2031-09-09 21:21:21
+PREHOOK: query: DROP TABLE timestamp_formats
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@timestamp_formats
+PREHOOK: Output: default@timestamp_formats
+POSTHOOK: query: DROP TABLE timestamp_formats
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@timestamp_formats
+POSTHOOK: Output: default@timestamp_formats
diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
index 8d3595b..e70d0c4 100644
--- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
+++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
@@ -111,6 +111,8 @@
public static final String LIST_COLUMN_TYPES = "columns.types";
+ public static final String TIMESTAMP_FORMATS = "timestamp.formats";
+
public static final Set PrimitiveTypes = new HashSet();
static {
PrimitiveTypes.add("void");
diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
index 2fb1c28..428894b 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
@@ -44,6 +44,7 @@
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -84,6 +85,7 @@
* @param escaped whether the data is escaped or not
* @param escapeChar if escaped is true, the escape character
* */
+ @Deprecated
public AvroLazyObjectInspector(List structFieldNames,
List structFieldObjectInspectors, List structFieldComments,
byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
@@ -92,6 +94,13 @@ public AvroLazyObjectInspector(List structFieldNames,
nullSequence, lastColumnTakesRest, escaped, escapeChar);
}
+ public AvroLazyObjectInspector(List structFieldNames,
+ List structFieldObjectInspectors, List structFieldComments,
+ byte separator, Text nullSequence, boolean lastColumnTakesRest, LazyObjectInspectorParameters lazyParams) {
+ super(structFieldNames, structFieldObjectInspectors, structFieldComments, separator,
+ nullSequence, lastColumnTakesRest, lazyParams);
+ }
+
/**
* Set the reader schema for the {@link AvroLazyObjectInspector} to the given schema
* */
diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
index 1587be8..d6a64f0 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
@@ -36,6 +36,7 @@
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -97,8 +98,8 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException
// ColumnarObject uses same ObjectInpector as LazyStruct
cachedObjectInspector = LazyFactory.createColumnarStructInspector(
serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams
- .getSeparators(), serdeParams.getNullSequence(), serdeParams
- .isEscaped(), serdeParams.getEscapeChar());
+ .getSeparators(), serdeParams.getNullSequence(),
+ new LazyObjectInspectorParametersImpl(serdeParams));
int size = serdeParams.getColumnTypes().size();
List notSkipIDs = new ArrayList();
diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
index e3968a9..20018ca 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
@@ -38,6 +38,8 @@
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector;
@@ -216,6 +218,7 @@
* @return The ObjectInspector
* @throws SerDeException
*/
+ @Deprecated
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
byte escapeChar, ObjectInspectorOptions option) throws SerDeException {
@@ -240,6 +243,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
* @return The ObjectInspector
* @throws SerDeException
*/
+ @Deprecated
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
byte escapeChar) throws SerDeException {
@@ -261,6 +265,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
* @return The ObjectInspector
* @throws SerDeException
*/
+ @Deprecated
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException {
@@ -282,30 +287,52 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
* @return The ObjectInspector
* @throws SerDeException
*/
+ @Deprecated
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException {
+ LazyObjectInspectorParametersImpl lazyParams =
+ new LazyObjectInspectorParametersImpl(escaped, escapeChar, extendedBooleanLiteral, null);
+ return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence,
+ lazyParams, option);
+ }
+
+ /**
+ * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo.
+ *
+ * @param typeInfo The type information for the LazyObject
+ * @param separator The array of separators for delimiting each level
+ * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of
+ * separator, and map uses 2 levels: the first one for delimiting entries, the second one
+ * for delimiting key and values.
+ * @param lazyParams Params for lazy types
+ * @param option the {@link ObjectInspectorOption}
+ * @return The ObjectInspector
+ * @throws SerDeException
+ */
+ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+ byte[] separator, int separatorIndex, Text nullSequence,
+ LazyObjectInspectorParameters lazyParams, ObjectInspectorOptions option) throws SerDeException {
ObjectInspector.Category c = typeInfo.getCategory();
switch (c) {
case PRIMITIVE:
return LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(
- (PrimitiveTypeInfo) typeInfo, escaped, escapeChar, extendedBooleanLiteral);
+ (PrimitiveTypeInfo) typeInfo, lazyParams);
case MAP:
return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(
- createLazyObjectInspector(((MapTypeInfo) typeInfo)
- .getMapKeyTypeInfo(), separator, separatorIndex + 2,
- nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector(
- ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator,
- separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option),
+ createLazyObjectInspector(((MapTypeInfo) typeInfo).getMapKeyTypeInfo(),
+ separator, separatorIndex + 2, nullSequence, lazyParams, option),
+ createLazyObjectInspector(((MapTypeInfo) typeInfo).getMapValueTypeInfo(),
+ separator, separatorIndex + 2, nullSequence, lazyParams, option),
LazyUtils.getSeparator(separator, separatorIndex),
LazyUtils.getSeparator(separator, separatorIndex+1),
- nullSequence, escaped, escapeChar);
+ nullSequence, lazyParams);
case LIST:
return LazyObjectInspectorFactory.getLazySimpleListObjectInspector(
createLazyObjectInspector(((ListTypeInfo) typeInfo)
.getListElementTypeInfo(), separator, separatorIndex + 1,
- nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex),
- nullSequence, escaped, escapeChar);
+ nullSequence, lazyParams, option), LazyUtils.getSeparator(separator, separatorIndex),
+ nullSequence, lazyParams);
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List fieldNames = structTypeInfo.getAllStructFieldNames();
@@ -315,25 +342,23 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
fieldTypeInfos.size());
for (int i = 0; i < fieldTypeInfos.size(); i++) {
fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos
- .get(i), separator, separatorIndex + 1, nullSequence, escaped,
- escapeChar, extendedBooleanLiteral, option));
+ .get(i), separator, separatorIndex + 1, nullSequence, lazyParams, option));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
- fieldNames, fieldObjectInspectors,
+ fieldNames, fieldObjectInspectors, null,
LazyUtils.getSeparator(separator, separatorIndex),
- nullSequence,
- false, escaped, escapeChar, option);
+ nullSequence,
+ false, lazyParams, option);
case UNION:
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List lazyOIs = new ArrayList();
for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) {
lazyOIs.add(createLazyObjectInspector(uti, separator,
- separatorIndex + 1, nullSequence, escaped,
- escapeChar, extendedBooleanLiteral, option));
+ separatorIndex + 1, nullSequence, lazyParams, option));
}
return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs,
LazyUtils.getSeparator(separator, separatorIndex),
- nullSequence, escaped, escapeChar);
+ nullSequence, lazyParams);
}
throw new RuntimeException("Hive LazySerDe Internal error.");
@@ -350,6 +375,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
* @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text,
* boolean, byte)
*/
+ @Deprecated
public static ObjectInspector createLazyStructInspector(
List columnNames, List typeInfos, byte[] separators,
Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
@@ -370,20 +396,43 @@ public static ObjectInspector createLazyStructInspector(
* @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text,
* boolean, byte)
*/
+ @Deprecated
public static ObjectInspector createLazyStructInspector(
List columnNames, List typeInfos, byte[] separators,
Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException {
+ LazyObjectInspectorParametersImpl lazyParams =
+ new LazyObjectInspectorParametersImpl(escaped, escapeChar, extendedBooleanLiteral, null);
+ return createLazyStructInspector(columnNames, typeInfos, separators,
+ nullSequence, lastColumnTakesRest, lazyParams);
+ }
+
+ /**
+ * Create a hierarchical ObjectInspector for LazyStruct with the given
+ * columnNames and columnTypeInfos.
+ *
+ * @param lastColumnTakesRest
+ * whether the last column of the struct should take the rest of the
+ * row if there are extra fields.
+ * @param lazyParams parameters for the lazy types
+ * @throws SerDeException
+ * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text,
+ * boolean, byte)
+ */
+ public static ObjectInspector createLazyStructInspector(
+ List columnNames, List typeInfos, byte[] separators,
+ Text nullSequence, boolean lastColumnTakesRest,
+ LazyObjectInspectorParameters lazyParams) throws SerDeException {
ArrayList columnObjectInspectors = new ArrayList(
typeInfos.size());
for (int i = 0; i < typeInfos.size(); i++) {
columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(
- typeInfos.get(i), separators, 1, nullSequence, escaped, escapeChar,
- extendedBooleanLiteral));
+ typeInfos.get(i), separators, 1, nullSequence,
+ lazyParams, ObjectInspectorOptions.JAVA));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
- columnNames, columnObjectInspectors, separators[0], nullSequence,
- lastColumnTakesRest, escaped, escapeChar);
+ columnNames, columnObjectInspectors, null, separators[0], nullSequence,
+ lastColumnTakesRest, lazyParams, ObjectInspectorOptions.JAVA);
}
/**
@@ -394,15 +443,33 @@ public static ObjectInspector createLazyStructInspector(
* @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text,
* boolean, byte)
*/
+ @Deprecated
public static ObjectInspector createColumnarStructInspector(
List columnNames, List columnTypes, byte[] separators,
Text nullSequence, boolean escaped, byte escapeChar) throws SerDeException {
+ LazyObjectInspectorParametersImpl lazyParams =
+ new LazyObjectInspectorParametersImpl(escaped, escapeChar, false, null);
+ return createColumnarStructInspector(columnNames, columnTypes,
+ separators, nullSequence, lazyParams);
+ }
+
+ /**
+ * Create a hierarchical ObjectInspector for ColumnarStruct with the given
+ * columnNames and columnTypeInfos.
+ * @throws SerDeException
+ *
+ * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text,
+ * boolean, byte)
+ */
+ public static ObjectInspector createColumnarStructInspector(
+ List columnNames, List columnTypes, byte[] separators,
+ Text nullSequence, LazyObjectInspectorParameters lazyParams) throws SerDeException {
ArrayList columnObjectInspectors = new ArrayList(
columnTypes.size());
for (int i = 0; i < columnTypes.size(); i++) {
columnObjectInspectors
.add(LazyFactory.createLazyObjectInspector(columnTypes.get(i),
- separators, 1, nullSequence, escaped, escapeChar, false));
+ separators, 1, nullSequence, lazyParams, ObjectInspectorOptions.JAVA));
}
return ObjectInspectorFactory.getColumnarStructObjectInspector(columnNames,
columnObjectInspectors);
diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
index 95e30db..3cc76c4 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
@@ -19,6 +19,8 @@
package org.apache.hadoop.hive.serde2.lazy;
import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@@ -36,6 +38,8 @@
import org.apache.hadoop.hive.serde2.SerDeSpec;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -127,7 +131,7 @@ public static byte getByte(String altValue, byte defaultVal) {
* SerDeParameters.
*
*/
- public static class SerDeParameters {
+ public static class SerDeParameters implements LazyObjectInspectorParameters {
byte[] separators = DefaultSeparators;
String nullString;
Text nullSequence;
@@ -141,6 +145,20 @@ public static byte getByte(String altValue, byte defaultVal) {
boolean[] needsEscape;
boolean extendedBooleanLiteral;
+ List timestampFormats;
+
+ public SerDeParameters() {
+ }
+
+ public SerDeParameters(
+ boolean escaped, byte escapeChar,
+ boolean extendedBooleanLiteral, List timestampFormats) {
+ super();
+ this.escaped = escaped;
+ this.escapeChar = escapeChar;
+ this.extendedBooleanLiteral = extendedBooleanLiteral;
+ this.timestampFormats = timestampFormats;
+ }
public List getColumnTypes() {
return columnTypes;
@@ -181,6 +199,14 @@ public byte getEscapeChar() {
public boolean[] getNeedsEscape() {
return needsEscape;
}
+
+ public boolean isExtendedBooleanLiteral() {
+ return extendedBooleanLiteral;
+ }
+
+ public List getTimestampFormats() {
+ return timestampFormats;
+ }
}
SerDeParameters serdeParams = null;
@@ -206,8 +232,9 @@ public void initialize(Configuration job, Properties tbl)
cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams
.getColumnNames(), serdeParams.getColumnTypes(), serdeParams
.getSeparators(), serdeParams.getNullSequence(), serdeParams
- .isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams
- .getEscapeChar(), serdeParams.extendedBooleanLiteral);
+ .isLastColumnTakesRest(),
+ // Lazy OI params may get cached, use lighter weight copy rather than entire SerDe params
+ new LazyObjectInspectorParametersImpl(serdeParams));
cachedLazyStruct = (LazyStruct) LazyFactory
.createLazyObject(cachedObjectInspector);
@@ -216,7 +243,7 @@ public void initialize(Configuration job, Properties tbl)
+ serdeParams.columnNames + " columnTypes=" + serdeParams.columnTypes
+ " separator=" + Arrays.asList(serdeParams.separators)
+ " nullstring=" + serdeParams.nullString + " lastColumnTakesRest="
- + serdeParams.lastColumnTakesRest);
+ + serdeParams.lastColumnTakesRest + " timestampFormats=" + serdeParams.timestampFormats);
serializedSize = 0;
stats = new SerDeStats();
@@ -324,6 +351,10 @@ public static SerDeParameters initSerdeParams(Configuration job,
serdeParams.extendedBooleanLiteral = job == null ? false :
job.getBoolean(ConfVars.HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL.varname, false);
+
+ serdeParams.timestampFormats =
+ getTimestampFormatList(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS));
+
return serdeParams;
}
@@ -608,4 +639,61 @@ protected Writable transformToUTF8(Writable blob) {
Text text = (Text)blob;
return SerDeUtils.transformTextToUTF8(text, this.charset);
}
+
+ /**
+ * From the conf string value, parse out the list of timestamp formats.
+ * Format strings are delimited by commas. Escape characters allowed are '\,' and '\\'.
+ * @param val
+ * @return List of strings representing each timestamp format, or null if val is null.
+ * @throws IllegalArgumentException
+ */
+ public static List getTimestampFormatList(String val) throws IllegalArgumentException {
+ if (val == null) {
+ return null;
+ }
+
+ StringBuilder sb = new StringBuilder();
+ List ret = new ArrayList();
+
+ boolean escaping = false;
+ for (int idx = 0; idx < val.length(); ++idx) {
+ char cur = val.charAt(idx);
+ if (escaping) {
+ switch (cur) {
+ case ',':
+ sb.append(cur);
+ break;
+ case '\\':
+ sb.append(cur);
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid character after \\: '" + cur + "'");
+ }
+ escaping = false;
+ } else {
+ switch (cur) {
+ case ',':
+ // End of pattern, add to pattern list if nonempty
+ if (sb.length() > 0) {
+ ret.add(sb.toString());
+ }
+ sb.setLength(0);
+ break;
+ case '\\':
+ escaping = true;
+ break;
+ default:
+ sb.append(cur);
+ break;
+ }
+ }
+ }
+
+ // Add any remaining chars in string builder as final pattern
+ if (sb.length() > 0) {
+ ret.add(sb.toString());
+ }
+
+ return ret;
+ }
}
diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java
index 27895c5..66134e1 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java
@@ -72,7 +72,7 @@ public void init(ByteArrayRef bytes, int start, int length) {
logExceptionMessage(bytes, start, length, "TIMESTAMP");
} else {
try {
- t = Timestamp.valueOf(s);
+ t = oi.getTimestampParser().parseTimestamp(s);
isNull = false;
} catch (IllegalArgumentException e) {
isNull = true;
diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
index 3943508..33b1b8f 100644
--- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
@@ -25,6 +25,7 @@
import java.nio.charset.CharacterCodingException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import java.util.Properties;
import org.apache.commons.codec.binary.Base64;
@@ -33,6 +34,7 @@
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
@@ -450,6 +452,14 @@ public static void copyAndEscapeStringDataToText(byte[] inputBytes, int start, i
}
}
+ public static void addLazyParamsToSignature(LazyObjectInspectorParameters lazyParams,
+ List