diff --git common/pom.xml common/pom.xml index ede8aea..16e8f48 100644 --- common/pom.xml +++ common/pom.xml @@ -56,6 +56,11 @@ ${commons-logging.version} + joda-time + joda-time + ${joda.version} + + log4j log4j ${log4j.version} diff --git common/src/java/org/apache/hive/common/util/TimestampParser.java common/src/java/org/apache/hive/common/util/TimestampParser.java new file mode 100644 index 0000000..02cfa0d --- /dev/null +++ common/src/java/org/apache/hive/common/util/TimestampParser.java @@ -0,0 +1,137 @@ +package org.apache.hive.common.util; + +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.joda.time.DateTime; +import org.joda.time.MutableDateTime; +import org.joda.time.DateTimeFieldType; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.DateTimeFormatterBuilder; +import org.joda.time.format.DateTimeParser; +import org.joda.time.format.DateTimeParserBucket; + +/** + * Timestamp parser using Joda DateTimeFormatter. Parser accepts 0 or more date time format + * patterns. If no format patterns are provided it will default to the normal Timestamp parsing. + * Datetime formats are compatible with Java SimpleDateFormat. Also added special case pattern + * "millis" to parse the string as milliseconds since Unix epoch. + * Since this uses Joda DateTimeFormatter, this parser should be thread safe. + */ +public class TimestampParser { + + protected final static String[] stringArray = new String[] {}; + protected final static String millisFormatString = "millis"; + protected final static DateTime startingDateValue = new DateTime(1970, 1, 1, 0, 0, 0, 0); + + protected String[] formatStrings = null; + protected DateTimeFormatter fmt = null; + + public TimestampParser() { + } + + public TimestampParser(TimestampParser tsParser) { + this(tsParser.formatStrings == null ? + null : Arrays.copyOf(tsParser.formatStrings, tsParser.formatStrings.length)); + } + + public TimestampParser(List formatStrings) { + this(formatStrings == null ? null : formatStrings.toArray(stringArray)); + } + + public TimestampParser(String[] formatStrings) { + this.formatStrings = formatStrings; + + // create formatter that includes all of the input patterns + if (formatStrings != null && formatStrings.length > 0) { + DateTimeParser[] parsers = new DateTimeParser[formatStrings.length]; + for (int idx = 0; idx < formatStrings.length; ++idx) { + String formatString = formatStrings[idx]; + if (formatString.equalsIgnoreCase(millisFormatString)) { + // Use milliseconds parser if pattern matches our special-case millis pattern string + parsers[idx] = new MillisDateFormatParser(); + } else { + parsers[idx] = DateTimeFormat.forPattern(formatString).getParser(); + } + } + fmt = new DateTimeFormatterBuilder().append(null, parsers).toFormatter(); + } + } + + /** + * Parse the input string and return a timestamp value + * @param strValue + * @return + * @throws IllegalArgumentException if input string cannot be parsed into timestamp + */ + public Timestamp parseTimestamp(String strValue) throws IllegalArgumentException { + if (fmt != null) { + // reset value in case any date fields are missing from the date pattern + MutableDateTime mdt = new MutableDateTime(startingDateValue); + + // Using parseInto() avoids throwing exception when parsing, + // allowing fallback to default timestamp parsing if custom patterns fail. + int ret = fmt.parseInto(mdt, strValue, 0); + // Only accept parse results if we parsed the entire string + if (ret == strValue.length()) { + return new Timestamp(mdt.getMillis()); + } + } + + // Otherwise try default timestamp parsing + return Timestamp.valueOf(strValue); + } + + /** + * DateTimeParser to parse the date string as the millis since Unix epoch + */ + public static class MillisDateFormatParser implements DateTimeParser { + private static final ThreadLocal numericMatcher = new ThreadLocal() { + @Override + protected Matcher initialValue() { + return Pattern.compile("-?\\d+(\\.\\d+)?$").matcher(""); + } + }; + + private final static DateTimeFieldType[] dateTimeFields = { + DateTimeFieldType.year(), + DateTimeFieldType.monthOfYear(), + DateTimeFieldType.dayOfMonth(), + DateTimeFieldType.hourOfDay(), + DateTimeFieldType.minuteOfHour(), + DateTimeFieldType.secondOfMinute(), + DateTimeFieldType.millisOfSecond() + }; + + public int estimateParsedLength() { + return 13; // Shouldn't hit 14 digits until year 2286 + } + + protected static boolean isNumericString(String val) { + Matcher matcher = numericMatcher.get(); + matcher.reset(val); + return matcher.matches(); + } + + public int parseInto(DateTimeParserBucket bucket, String text, int position) { + String substr = text.substring(position); + if (!isNumericString(substr)) { + return -1; + } + BigDecimal bd = new BigDecimal(substr); + // Joda DateTime only has precision to millis, so any fractional portion must be cut off + long millis = bd.longValue(); + DateTime dt = new DateTime(millis); + for (DateTimeFieldType field : dateTimeFields) { + bucket.saveField(field, dt.get(field)); + } + return substr.length(); + } + } +} diff --git common/src/test/org/apache/hive/common/util/TestTimestampParser.java common/src/test/org/apache/hive/common/util/TestTimestampParser.java new file mode 100644 index 0000000..9eb1dc5 --- /dev/null +++ common/src/test/org/apache/hive/common/util/TestTimestampParser.java @@ -0,0 +1,160 @@ +package org.apache.hive.common.util; + +import java.sql.Timestamp; +import static org.junit.Assert.*; +import org.junit.Test; + +public class TestTimestampParser { + public static class ValidTimestampCase { + String strValue; + Timestamp expectedValue; + + public ValidTimestampCase(String strValue, Timestamp expectedValue) { + this.strValue = strValue; + this.expectedValue = expectedValue; + } + } + + static void testValidCases(TimestampParser tp, ValidTimestampCase[] validCases) { + for (ValidTimestampCase validCase : validCases) { + Timestamp ts = tp.parseTimestamp(validCase.strValue); + assertEquals("Parsing " + validCase.strValue, validCase.expectedValue, ts); + } + } + + static void testInvalidCases(TimestampParser tp, String[] invalidCases) { + for (String invalidString : invalidCases) { + try { + Timestamp ts = tp.parseTimestamp(invalidString); + fail("Expected exception parsing " + invalidString + ", but parsed value to " + ts); + } catch (IllegalArgumentException err) { + // Exception expected + } + } + } + + @Test + public void testDefault() { + // No timestamp patterns, should default to normal timestamp format + TimestampParser tp = new TimestampParser(); + ValidTimestampCase[] validCases = { + new ValidTimestampCase("1945-12-31 23:59:59.0", + Timestamp.valueOf("1945-12-31 23:59:59.0")), + new ValidTimestampCase("1945-12-31 23:59:59.1234", + Timestamp.valueOf("1945-12-31 23:59:59.1234")), + new ValidTimestampCase("1970-01-01 00:00:00", + Timestamp.valueOf("1970-01-01 00:00:00")), + }; + + String[] invalidCases = { + "1945-12-31T23:59:59", + "12345", + }; + + testValidCases(tp, validCases); + testInvalidCases(tp, invalidCases); + } + + @Test + public void testPattern1() { + // Joda pattern matching expects fractional seconds length to match + // the number of 'S' in the pattern. So if you want to match .1, .12, .123, + // you need 3 different patterns with .S, .SS, .SSS + String[] patterns = { + // ISO-8601 timestamps + "yyyy-MM-dd'T'HH:mm:ss", + "yyyy-MM-dd'T'HH:mm:ss.S", + "yyyy-MM-dd'T'HH:mm:ss.SS", + "yyyy-MM-dd'T'HH:mm:ss.SSS", + "yyyy-MM-dd'T'HH:mm:ss.SSSS", + }; + TimestampParser tp = new TimestampParser(patterns); + + ValidTimestampCase[] validCases = { + new ValidTimestampCase("1945-12-31T23:59:59.0", + Timestamp.valueOf("1945-12-31 23:59:59.0")), + new ValidTimestampCase("2001-01-01 00:00:00.100", + Timestamp.valueOf("2001-01-01 00:00:00.100")), + new ValidTimestampCase("2001-01-01 00:00:00.001", + Timestamp.valueOf("2001-01-01 00:00:00.001")), + // Joda parsing only supports up to millisecond precision + new ValidTimestampCase("1945-12-31T23:59:59.1234", + Timestamp.valueOf("1945-12-31 23:59:59.123")), + new ValidTimestampCase("1970-01-01T00:00:00", + Timestamp.valueOf("1970-01-01 00:00:00")), + new ValidTimestampCase("1970-4-5T6:7:8", + Timestamp.valueOf("1970-04-05 06:07:08")), + + // Default timestamp format still works? + new ValidTimestampCase("2001-01-01 00:00:00", + Timestamp.valueOf("2001-01-01 00:00:00")), + new ValidTimestampCase("1945-12-31 23:59:59.1234", + Timestamp.valueOf("1945-12-31 23:59:59.1234")), + }; + + String[] invalidCases = { + "1945-12-31-23:59:59", + "1945-12-31T23:59:59.12345", // our pattern didn't specify 5 decimal places + "12345", + }; + + testValidCases(tp, validCases); + testInvalidCases(tp, invalidCases); + } + + @Test + public void testMillisParser() { + String[] patterns = { + "millis", + // Also try other patterns + "yyyy-MM-dd'T'HH:mm:ss", + }; + TimestampParser tp = new TimestampParser(patterns); + + ValidTimestampCase[] validCases = { + new ValidTimestampCase("0", new Timestamp(0)), + new ValidTimestampCase("-1000000", new Timestamp(-1000000)), + new ValidTimestampCase("1420509274123", new Timestamp(1420509274123L)), + new ValidTimestampCase("1420509274123.456789", new Timestamp(1420509274123L)), + + // Other format pattern should also work + new ValidTimestampCase("1945-12-31T23:59:59", + Timestamp.valueOf("1945-12-31 23:59:59")), + }; + + String[] invalidCases = { + "1945-12-31-23:59:59", + "1945-12-31T23:59:59.12345", // our pattern didn't specify 5 decimal places + "1420509274123-", + }; + + testValidCases(tp, validCases); + testInvalidCases(tp, invalidCases); + } + + @Test + public void testPattern2() { + // Pattern does not contain all date fields + String[] patterns = { + "HH:mm", + "MM:dd:ss", + }; + TimestampParser tp = new TimestampParser(patterns); + + ValidTimestampCase[] validCases = { + new ValidTimestampCase("05:06", + Timestamp.valueOf("1970-01-01 05:06:00")), + new ValidTimestampCase("05:06:07", + Timestamp.valueOf("1970-05-06 00:00:07")), + }; + + String[] invalidCases = { + "1945-12-31T23:59:59", + "1945:12:31-", + "12345", + }; + + testValidCases(tp, validCases); + testInvalidCases(tp, invalidCases); + } +} diff --git data/files/ts_formats.txt data/files/ts_formats.txt new file mode 100644 index 0000000..e13718b --- /dev/null +++ data/files/ts_formats.txt @@ -0,0 +1,21 @@ +2011-01-01 01:01:01.1111111112011-01-01 01:01:01.1111111112011-01-01T01:01:01.1111111112011-01-01T01:01:01.1111111112011-01-01T01:01:012011-01-01T01:01:01 +2012-02-02 02:02:02.2222222222012-02-02 02:02:02.2222222222012-02-02T02:02:02.2222222222012-02-02T02:02:02.2222222222012-02-02T02:02:022012-02-02T02:02:02 +2013-03-03 03:03:03.3333333332013-03-03 03:03:03.3333333332013-03-03T03:03:03.3333333332013-03-03T03:03:03.3333333332013-03-03T03:03:032013-03-03T03:03:03 +2014-04-04 04:04:04.4444444442014-04-04 04:04:04.4444444442014-04-04T04:04:04.4444444442014-04-04T04:04:04.4444444442014-04-04T04:04:042014-04-04T04:04:04 +2015-05-05 05:05:05.5555555552015-05-05 05:05:05.5555555552015-05-05T05:05:05.5555555552015-05-05T05:05:05.5555555552015-05-05T05:05:052015-05-05T05:05:05 +2016-06-06 06:06:06.6666666662016-06-06 06:06:06.6666666662016-06-06T06:06:06.6666666662016-06-06T06:06:06.6666666662016-06-06T06:06:062016-06-06T06:06:06 +2017-07-07 07:07:07.7777777772017-07-07 07:07:07.7777777772017-07-07T07:07:07.7777777772017-07-07T07:07:07.7777777772017-07-07T07:07:072017-07-07T07:07:07 +2018-08-08 08:08:08.8888888882018-08-08 08:08:08.8888888882018-08-08T08:08:08.8888888882018-08-08T08:08:08.8888888882018-08-08T08:08:082018-08-08T08:08:08 +2019-09-09 09:09:09.9999999992019-09-09 09:09:09.9999999992019-09-09T09:09:09.9999999992019-09-09T09:09:09.9999999992019-09-09T09:09:092019-09-09T09:09:09 +2020-10-10 10:10:10.1010101012020-10-10 10:10:10.1010101012020-10-10T10:10:10.1010101012020-10-10T10:10:10.1010101012020-10-10T10:10:102020-10-10T10:10:10 +2021-11-11 11:11:11.1111111112021-11-11 11:11:11.1111111112021-11-11T11:11:11.1111111112021-11-11T11:11:11.1111111112021-11-11T11:11:112021-11-11T11:11:11 +2022-12-12 12:12:12.1212121212022-12-12 12:12:12.1212121212022-12-12T12:12:12.1212121212022-12-12T12:12:12.1212121212022-12-12T12:12:122022-12-12T12:12:12 +2023-01-02 13:13:13.1313131312023-01-02 13:13:13.1313131312023-01-02T13:13:13.1313131312023-01-02T13:13:13.1313131312023-01-02T13:13:132023-01-02T13:13:13 +2024-02-02 14:14:14.1414141412024-02-02 14:14:14.1414141412024-02-02T14:14:14.1414141412024-02-02T14:14:14.1414141412024-02-02T14:14:142024-02-02T14:14:14 +2025-03-03 15:15:15.1515151512025-03-03 15:15:15.1515151512025-03-03T15:15:15.1515151512025-03-03T15:15:15.1515151512025-03-03T15:15:152025-03-03T15:15:15 +2026-04-04 16:16:16.1616161612026-04-04 16:16:16.1616161612026-04-04T16:16:16.1616161612026-04-04T16:16:16.1616161612026-04-04T16:16:162026-04-04T16:16:16 +2027-05-05 17:17:17.1717171712027-05-05 17:17:17.1717171712027-05-05T17:17:17.1717171712027-05-05T17:17:17.1717171712027-05-05T17:17:172027-05-05T17:17:17 +2028-06-06 18:18:18.1818181812028-06-06 18:18:18.1818181812028-06-06T18:18:18.1818181812028-06-06T18:18:18.1818181812028-06-06T18:18:182028-06-06T18:18:18 +2029-07-07 19:19:19.1919191912029-07-07 19:19:19.1919191912029-07-07T19:19:19.1919191912029-07-07T19:19:19.1919191912029-07-07T19:19:192029-07-07T19:19:19 +2030-08-08 20:20:20.2020202022030-08-08 20:20:20.2020202022030-08-08T20:20:20.2020202022030-08-08T20:20:20.2020202022030-08-08T20:20:202030-08-08T20:20:20 +2031-09-09 21:21:21.2121212122031-09-09 21:21:21.2121212122031-09-09T21:21:21.2121212122031-09-09T21:21:21.2121212122031-09-09T21:21:212031-09-09T21:21:21 diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java index 98bc73f..ab48d46 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class DefaultHBaseKeyFactory extends AbstractHBaseKeyFactory implements HBaseKeyFactory { @@ -45,7 +46,7 @@ public void init(HBaseSerDeParameters hbaseParam, Properties properties) throws @Override public ObjectInspector createKeyObjectInspector(TypeInfo type) throws SerDeException { return LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), 1, - serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); + serdeParams.getNullSequence(), serdeParams, ObjectInspectorOptions.JAVA); } @Override diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java index 78f23cb..2326d00 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java @@ -25,8 +25,12 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.Text; // Does same thing with LazyFactory#createLazyObjectInspector except that this replaces // original keyOI with OI which is create by HBaseKeyFactory provided by serde property for hbase @@ -46,8 +50,8 @@ public static ObjectInspector createLazyHBaseStructInspector( } } return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( - serdeParams.getColumnNames(), columnObjectInspectors, serdeParams.getSeparators()[0], + serdeParams.getColumnNames(), columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(), - serdeParams.isEscaped(), serdeParams.getEscapeChar()); + serdeParams, ObjectInspectorOptions.JAVA); } } \ No newline at end of file diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java index a2ba827..0f31d2f 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java @@ -75,8 +75,7 @@ public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Propertie public ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException { ObjectInspector oi = LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), 1, - serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), - ObjectInspectorOptions.AVRO); + serdeParams.getNullSequence(), serdeParams, ObjectInspectorOptions.AVRO); // initialize the object inspectors initInternalObjectInspectors(oi); diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java index e60b844..1e85aa2 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -63,7 +64,7 @@ public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Propertie public ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException { return LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), - 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); + 1, serdeParams.getNullSequence(), serdeParams, ObjectInspectorOptions.JAVA); } @Override diff --git hbase-handler/src/test/queries/positive/hbase_timestamp_format.q hbase-handler/src/test/queries/positive/hbase_timestamp_format.q new file mode 100644 index 0000000..a8d5501 --- /dev/null +++ hbase-handler/src/test/queries/positive/hbase_timestamp_format.q @@ -0,0 +1,21 @@ + +create table hbase_str(rowkey string,mytime string,mystr string) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr') + TBLPROPERTIES ('hbase.table.name' = 'hbase_ts'); + +describe hbase_str; +insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456', value from src limit 3; +select * from hbase_str; + +-- Timestamp string does not match the default timestamp format, specify a custom timestamp format +create external table hbase_ts(rowkey string,mytime timestamp,mystr string) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr', 'timestamp.formats' = 'yyyy-MM-dd-HH.mm.ss.SSSSSS') + TBLPROPERTIES ('hbase.table.name' = 'hbase_ts'); + +describe hbase_ts; +select * from hbase_ts; + +drop table hbase_str; +drop table hbase_ts; diff --git hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out new file mode 100644 index 0000000..138cfe6 --- /dev/null +++ hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out @@ -0,0 +1,94 @@ +PREHOOK: query: create table hbase_str(rowkey string,mytime string,mystr string) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr') + TBLPROPERTIES ('hbase.table.name' = 'hbase_ts') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hbase_str +POSTHOOK: query: create table hbase_str(rowkey string,mytime string,mystr string) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr') + TBLPROPERTIES ('hbase.table.name' = 'hbase_ts') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hbase_str +PREHOOK: query: describe hbase_str +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@hbase_str +POSTHOOK: query: describe hbase_str +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@hbase_str +rowkey string from deserializer +mytime string from deserializer +mystr string from deserializer +PREHOOK: query: insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456', value from src limit 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@hbase_str +POSTHOOK: query: insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456', value from src limit 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@hbase_str +PREHOOK: query: select * from hbase_str +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_str +#### A masked pattern was here #### +POSTHOOK: query: select * from hbase_str +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_str +#### A masked pattern was here #### +238 2001-02-03-04.05.06.123456 val_238 +311 2001-02-03-04.05.06.123456 val_311 +86 2001-02-03-04.05.06.123456 val_86 +PREHOOK: query: -- Timestamp string does not match the default timestamp format, specify a custom timestamp format +create external table hbase_ts(rowkey string,mytime timestamp,mystr string) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr', 'timestamp.formats' = 'yyyy-MM-dd-HH.mm.ss.SSSSSS') + TBLPROPERTIES ('hbase.table.name' = 'hbase_ts') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@hbase_ts +POSTHOOK: query: -- Timestamp string does not match the default timestamp format, specify a custom timestamp format +create external table hbase_ts(rowkey string,mytime timestamp,mystr string) + STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' + WITH SERDEPROPERTIES ('hbase.columns.mapping' = 'm:mytime,m:mystr', 'timestamp.formats' = 'yyyy-MM-dd-HH.mm.ss.SSSSSS') + TBLPROPERTIES ('hbase.table.name' = 'hbase_ts') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@hbase_ts +PREHOOK: query: describe hbase_ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@hbase_ts +POSTHOOK: query: describe hbase_ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@hbase_ts +rowkey string from deserializer +mytime timestamp from deserializer +mystr string from deserializer +PREHOOK: query: select * from hbase_ts +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_ts +#### A masked pattern was here #### +POSTHOOK: query: select * from hbase_ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_ts +#### A masked pattern was here #### +238 2001-02-03 04:05:06.123 val_238 +311 2001-02-03 04:05:06.123 val_311 +86 2001-02-03 04:05:06.123 val_86 +PREHOOK: query: drop table hbase_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hbase_str +PREHOOK: Output: default@hbase_str +POSTHOOK: query: drop table hbase_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hbase_str +POSTHOOK: Output: default@hbase_str +PREHOOK: query: drop table hbase_ts +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hbase_ts +PREHOOK: Output: default@hbase_ts +POSTHOOK: query: drop table hbase_ts +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hbase_ts +POSTHOOK: Output: default@hbase_ts diff --git pom.xml pom.xml index c147d45..bf60007 100644 --- pom.xml +++ pom.xml @@ -134,6 +134,7 @@ 1.14 2.12 1.1 + 2.5 3.5.2 20090211 4.11 diff --git ql/src/test/queries/clientpositive/timestamp_formats.q ql/src/test/queries/clientpositive/timestamp_formats.q new file mode 100644 index 0000000..b187b40 --- /dev/null +++ ql/src/test/queries/clientpositive/timestamp_formats.q @@ -0,0 +1,23 @@ + +CREATE TABLE timestamp_formats ( + c1 string, + c1_ts timestamp, + c2 string, + c2_ts timestamp, + c3 string, + c3_ts timestamp +); + +LOAD DATA LOCAL INPATH '../../data/files/ts_formats.txt' overwrite into table timestamp_formats; + +SELECT * FROM timestamp_formats; + +-- Add single timestamp format. This should allow c3_ts to parse +ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss"); +SELECT * FROM timestamp_formats; + +-- Add another format, to allow c2_ts to parse +ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss,yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS"); +SELECT * FROM timestamp_formats; + +DROP TABLE timestamp_formats; diff --git ql/src/test/results/clientpositive/timestamp_formats.q.out ql/src/test/results/clientpositive/timestamp_formats.q.out new file mode 100644 index 0000000..9cc9b29 --- /dev/null +++ ql/src/test/results/clientpositive/timestamp_formats.q.out @@ -0,0 +1,145 @@ +PREHOOK: query: CREATE TABLE timestamp_formats ( + c1 string, + c1_ts timestamp, + c2 string, + c2_ts timestamp, + c3 string, + c3_ts timestamp +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamp_formats +POSTHOOK: query: CREATE TABLE timestamp_formats ( + c1 string, + c1_ts timestamp, + c2 string, + c2_ts timestamp, + c3 string, + c3_ts timestamp +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamp_formats +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ts_formats.txt' overwrite into table timestamp_formats +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@timestamp_formats +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/ts_formats.txt' overwrite into table timestamp_formats +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@timestamp_formats +PREHOOK: query: SELECT * FROM timestamp_formats +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_formats +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_formats +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_formats +#### A masked pattern was here #### +2011-01-01 01:01:01.111111111 2011-01-01 01:01:01.111111111 2011-01-01T01:01:01.111111111 NULL 2011-01-01T01:01:01 NULL +2012-02-02 02:02:02.222222222 2012-02-02 02:02:02.222222222 2012-02-02T02:02:02.222222222 NULL 2012-02-02T02:02:02 NULL +2013-03-03 03:03:03.333333333 2013-03-03 03:03:03.333333333 2013-03-03T03:03:03.333333333 NULL 2013-03-03T03:03:03 NULL +2014-04-04 04:04:04.444444444 2014-04-04 04:04:04.444444444 2014-04-04T04:04:04.444444444 NULL 2014-04-04T04:04:04 NULL +2015-05-05 05:05:05.555555555 2015-05-05 05:05:05.555555555 2015-05-05T05:05:05.555555555 NULL 2015-05-05T05:05:05 NULL +2016-06-06 06:06:06.666666666 2016-06-06 06:06:06.666666666 2016-06-06T06:06:06.666666666 NULL 2016-06-06T06:06:06 NULL +2017-07-07 07:07:07.777777777 2017-07-07 07:07:07.777777777 2017-07-07T07:07:07.777777777 NULL 2017-07-07T07:07:07 NULL +2018-08-08 08:08:08.888888888 2018-08-08 08:08:08.888888888 2018-08-08T08:08:08.888888888 NULL 2018-08-08T08:08:08 NULL +2019-09-09 09:09:09.999999999 2019-09-09 09:09:09.999999999 2019-09-09T09:09:09.999999999 NULL 2019-09-09T09:09:09 NULL +2020-10-10 10:10:10.101010101 2020-10-10 10:10:10.101010101 2020-10-10T10:10:10.101010101 NULL 2020-10-10T10:10:10 NULL +2021-11-11 11:11:11.111111111 2021-11-11 11:11:11.111111111 2021-11-11T11:11:11.111111111 NULL 2021-11-11T11:11:11 NULL +2022-12-12 12:12:12.121212121 2022-12-12 12:12:12.121212121 2022-12-12T12:12:12.121212121 NULL 2022-12-12T12:12:12 NULL +2023-01-02 13:13:13.131313131 2023-01-02 13:13:13.131313131 2023-01-02T13:13:13.131313131 NULL 2023-01-02T13:13:13 NULL +2024-02-02 14:14:14.141414141 2024-02-02 14:14:14.141414141 2024-02-02T14:14:14.141414141 NULL 2024-02-02T14:14:14 NULL +2025-03-03 15:15:15.151515151 2025-03-03 15:15:15.151515151 2025-03-03T15:15:15.151515151 NULL 2025-03-03T15:15:15 NULL +2026-04-04 16:16:16.161616161 2026-04-04 16:16:16.161616161 2026-04-04T16:16:16.161616161 NULL 2026-04-04T16:16:16 NULL +2027-05-05 17:17:17.171717171 2027-05-05 17:17:17.171717171 2027-05-05T17:17:17.171717171 NULL 2027-05-05T17:17:17 NULL +2028-06-06 18:18:18.181818181 2028-06-06 18:18:18.181818181 2028-06-06T18:18:18.181818181 NULL 2028-06-06T18:18:18 NULL +2029-07-07 19:19:19.191919191 2029-07-07 19:19:19.191919191 2029-07-07T19:19:19.191919191 NULL 2029-07-07T19:19:19 NULL +2030-08-08 20:20:20.202020202 2030-08-08 20:20:20.202020202 2030-08-08T20:20:20.202020202 NULL 2030-08-08T20:20:20 NULL +2031-09-09 21:21:21.212121212 2031-09-09 21:21:21.212121212 2031-09-09T21:21:21.212121212 NULL 2031-09-09T21:21:21 NULL +PREHOOK: query: -- Add single timestamp format. This should allow c3_ts to parse +ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss") +PREHOOK: type: ALTERTABLE_SERDEPROPERTIES +PREHOOK: Input: default@timestamp_formats +PREHOOK: Output: default@timestamp_formats +POSTHOOK: query: -- Add single timestamp format. This should allow c3_ts to parse +ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss") +POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES +POSTHOOK: Input: default@timestamp_formats +POSTHOOK: Output: default@timestamp_formats +PREHOOK: query: SELECT * FROM timestamp_formats +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_formats +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_formats +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_formats +#### A masked pattern was here #### +2011-01-01 01:01:01.111111111 2011-01-01 01:01:01.111111111 2011-01-01T01:01:01.111111111 NULL 2011-01-01T01:01:01 2011-01-01 01:01:01 +2012-02-02 02:02:02.222222222 2012-02-02 02:02:02.222222222 2012-02-02T02:02:02.222222222 NULL 2012-02-02T02:02:02 2012-02-02 02:02:02 +2013-03-03 03:03:03.333333333 2013-03-03 03:03:03.333333333 2013-03-03T03:03:03.333333333 NULL 2013-03-03T03:03:03 2013-03-03 03:03:03 +2014-04-04 04:04:04.444444444 2014-04-04 04:04:04.444444444 2014-04-04T04:04:04.444444444 NULL 2014-04-04T04:04:04 2014-04-04 04:04:04 +2015-05-05 05:05:05.555555555 2015-05-05 05:05:05.555555555 2015-05-05T05:05:05.555555555 NULL 2015-05-05T05:05:05 2015-05-05 05:05:05 +2016-06-06 06:06:06.666666666 2016-06-06 06:06:06.666666666 2016-06-06T06:06:06.666666666 NULL 2016-06-06T06:06:06 2016-06-06 06:06:06 +2017-07-07 07:07:07.777777777 2017-07-07 07:07:07.777777777 2017-07-07T07:07:07.777777777 NULL 2017-07-07T07:07:07 2017-07-07 07:07:07 +2018-08-08 08:08:08.888888888 2018-08-08 08:08:08.888888888 2018-08-08T08:08:08.888888888 NULL 2018-08-08T08:08:08 2018-08-08 08:08:08 +2019-09-09 09:09:09.999999999 2019-09-09 09:09:09.999999999 2019-09-09T09:09:09.999999999 NULL 2019-09-09T09:09:09 2019-09-09 09:09:09 +2020-10-10 10:10:10.101010101 2020-10-10 10:10:10.101010101 2020-10-10T10:10:10.101010101 NULL 2020-10-10T10:10:10 2020-10-10 10:10:10 +2021-11-11 11:11:11.111111111 2021-11-11 11:11:11.111111111 2021-11-11T11:11:11.111111111 NULL 2021-11-11T11:11:11 2021-11-11 11:11:11 +2022-12-12 12:12:12.121212121 2022-12-12 12:12:12.121212121 2022-12-12T12:12:12.121212121 NULL 2022-12-12T12:12:12 2022-12-12 12:12:12 +2023-01-02 13:13:13.131313131 2023-01-02 13:13:13.131313131 2023-01-02T13:13:13.131313131 NULL 2023-01-02T13:13:13 2023-01-02 13:13:13 +2024-02-02 14:14:14.141414141 2024-02-02 14:14:14.141414141 2024-02-02T14:14:14.141414141 NULL 2024-02-02T14:14:14 2024-02-02 14:14:14 +2025-03-03 15:15:15.151515151 2025-03-03 15:15:15.151515151 2025-03-03T15:15:15.151515151 NULL 2025-03-03T15:15:15 2025-03-03 15:15:15 +2026-04-04 16:16:16.161616161 2026-04-04 16:16:16.161616161 2026-04-04T16:16:16.161616161 NULL 2026-04-04T16:16:16 2026-04-04 16:16:16 +2027-05-05 17:17:17.171717171 2027-05-05 17:17:17.171717171 2027-05-05T17:17:17.171717171 NULL 2027-05-05T17:17:17 2027-05-05 17:17:17 +2028-06-06 18:18:18.181818181 2028-06-06 18:18:18.181818181 2028-06-06T18:18:18.181818181 NULL 2028-06-06T18:18:18 2028-06-06 18:18:18 +2029-07-07 19:19:19.191919191 2029-07-07 19:19:19.191919191 2029-07-07T19:19:19.191919191 NULL 2029-07-07T19:19:19 2029-07-07 19:19:19 +2030-08-08 20:20:20.202020202 2030-08-08 20:20:20.202020202 2030-08-08T20:20:20.202020202 NULL 2030-08-08T20:20:20 2030-08-08 20:20:20 +2031-09-09 21:21:21.212121212 2031-09-09 21:21:21.212121212 2031-09-09T21:21:21.212121212 NULL 2031-09-09T21:21:21 2031-09-09 21:21:21 +PREHOOK: query: -- Add another format, to allow c2_ts to parse +ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss,yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS") +PREHOOK: type: ALTERTABLE_SERDEPROPERTIES +PREHOOK: Input: default@timestamp_formats +PREHOOK: Output: default@timestamp_formats +POSTHOOK: query: -- Add another format, to allow c2_ts to parse +ALTER TABLE timestamp_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ss,yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS") +POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES +POSTHOOK: Input: default@timestamp_formats +POSTHOOK: Output: default@timestamp_formats +PREHOOK: query: SELECT * FROM timestamp_formats +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamp_formats +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM timestamp_formats +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamp_formats +#### A masked pattern was here #### +2011-01-01 01:01:01.111111111 2011-01-01 01:01:01.111111111 2011-01-01T01:01:01.111111111 2011-01-01 01:01:01.111 2011-01-01T01:01:01 2011-01-01 01:01:01 +2012-02-02 02:02:02.222222222 2012-02-02 02:02:02.222222222 2012-02-02T02:02:02.222222222 2012-02-02 02:02:02.222 2012-02-02T02:02:02 2012-02-02 02:02:02 +2013-03-03 03:03:03.333333333 2013-03-03 03:03:03.333333333 2013-03-03T03:03:03.333333333 2013-03-03 03:03:03.333 2013-03-03T03:03:03 2013-03-03 03:03:03 +2014-04-04 04:04:04.444444444 2014-04-04 04:04:04.444444444 2014-04-04T04:04:04.444444444 2014-04-04 04:04:04.444 2014-04-04T04:04:04 2014-04-04 04:04:04 +2015-05-05 05:05:05.555555555 2015-05-05 05:05:05.555555555 2015-05-05T05:05:05.555555555 2015-05-05 05:05:05.555 2015-05-05T05:05:05 2015-05-05 05:05:05 +2016-06-06 06:06:06.666666666 2016-06-06 06:06:06.666666666 2016-06-06T06:06:06.666666666 2016-06-06 06:06:06.666 2016-06-06T06:06:06 2016-06-06 06:06:06 +2017-07-07 07:07:07.777777777 2017-07-07 07:07:07.777777777 2017-07-07T07:07:07.777777777 2017-07-07 07:07:07.777 2017-07-07T07:07:07 2017-07-07 07:07:07 +2018-08-08 08:08:08.888888888 2018-08-08 08:08:08.888888888 2018-08-08T08:08:08.888888888 2018-08-08 08:08:08.888 2018-08-08T08:08:08 2018-08-08 08:08:08 +2019-09-09 09:09:09.999999999 2019-09-09 09:09:09.999999999 2019-09-09T09:09:09.999999999 2019-09-09 09:09:09.999 2019-09-09T09:09:09 2019-09-09 09:09:09 +2020-10-10 10:10:10.101010101 2020-10-10 10:10:10.101010101 2020-10-10T10:10:10.101010101 2020-10-10 10:10:10.101 2020-10-10T10:10:10 2020-10-10 10:10:10 +2021-11-11 11:11:11.111111111 2021-11-11 11:11:11.111111111 2021-11-11T11:11:11.111111111 2021-11-11 11:11:11.111 2021-11-11T11:11:11 2021-11-11 11:11:11 +2022-12-12 12:12:12.121212121 2022-12-12 12:12:12.121212121 2022-12-12T12:12:12.121212121 2022-12-12 12:12:12.121 2022-12-12T12:12:12 2022-12-12 12:12:12 +2023-01-02 13:13:13.131313131 2023-01-02 13:13:13.131313131 2023-01-02T13:13:13.131313131 2023-01-02 13:13:13.131 2023-01-02T13:13:13 2023-01-02 13:13:13 +2024-02-02 14:14:14.141414141 2024-02-02 14:14:14.141414141 2024-02-02T14:14:14.141414141 2024-02-02 14:14:14.141 2024-02-02T14:14:14 2024-02-02 14:14:14 +2025-03-03 15:15:15.151515151 2025-03-03 15:15:15.151515151 2025-03-03T15:15:15.151515151 2025-03-03 15:15:15.151 2025-03-03T15:15:15 2025-03-03 15:15:15 +2026-04-04 16:16:16.161616161 2026-04-04 16:16:16.161616161 2026-04-04T16:16:16.161616161 2026-04-04 16:16:16.161 2026-04-04T16:16:16 2026-04-04 16:16:16 +2027-05-05 17:17:17.171717171 2027-05-05 17:17:17.171717171 2027-05-05T17:17:17.171717171 2027-05-05 17:17:17.171 2027-05-05T17:17:17 2027-05-05 17:17:17 +2028-06-06 18:18:18.181818181 2028-06-06 18:18:18.181818181 2028-06-06T18:18:18.181818181 2028-06-06 18:18:18.181 2028-06-06T18:18:18 2028-06-06 18:18:18 +2029-07-07 19:19:19.191919191 2029-07-07 19:19:19.191919191 2029-07-07T19:19:19.191919191 2029-07-07 19:19:19.191 2029-07-07T19:19:19 2029-07-07 19:19:19 +2030-08-08 20:20:20.202020202 2030-08-08 20:20:20.202020202 2030-08-08T20:20:20.202020202 2030-08-08 20:20:20.202 2030-08-08T20:20:20 2030-08-08 20:20:20 +2031-09-09 21:21:21.212121212 2031-09-09 21:21:21.212121212 2031-09-09T21:21:21.212121212 2031-09-09 21:21:21.212 2031-09-09T21:21:21 2031-09-09 21:21:21 +PREHOOK: query: DROP TABLE timestamp_formats +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@timestamp_formats +PREHOOK: Output: default@timestamp_formats +POSTHOOK: query: DROP TABLE timestamp_formats +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@timestamp_formats +POSTHOOK: Output: default@timestamp_formats diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 8d3595b..e70d0c4 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -111,6 +111,8 @@ public static final String LIST_COLUMN_TYPES = "columns.types"; + public static final String TIMESTAMP_FORMATS = "timestamp.formats"; + public static final Set PrimitiveTypes = new HashSet(); static { PrimitiveTypes.add("void"); diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java index 2fb1c28..428894b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -84,6 +85,7 @@ * @param escaped whether the data is escaped or not * @param escapeChar if escaped is true, the escape character * */ + @Deprecated public AvroLazyObjectInspector(List structFieldNames, List structFieldObjectInspectors, List structFieldComments, byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, @@ -92,6 +94,13 @@ public AvroLazyObjectInspector(List structFieldNames, nullSequence, lastColumnTakesRest, escaped, escapeChar); } + public AvroLazyObjectInspector(List structFieldNames, + List structFieldObjectInspectors, List structFieldComments, + byte separator, Text nullSequence, boolean lastColumnTakesRest, LazyObjectInspectorParameters lazyParams) { + super(structFieldNames, structFieldObjectInspectors, structFieldComments, separator, + nullSequence, lastColumnTakesRest, lazyParams); + } + /** * Set the reader schema for the {@link AvroLazyObjectInspector} to the given schema * */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java index 882c43e..48a5b2f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -97,8 +98,7 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException // ColumnarObject uses same ObjectInpector as LazyStruct cachedObjectInspector = LazyFactory.createColumnarStructInspector( serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams - .getSeparators(), serdeParams.getNullSequence(), serdeParams - .isEscaped(), serdeParams.getEscapeChar()); + .getSeparators(), serdeParams.getNullSequence(), serdeParams); int size = serdeParams.getColumnTypes().size(); List notSkipIDs = new ArrayList(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index e3968a9..20018ca 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -38,6 +38,8 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; @@ -216,6 +218,7 @@ * @return The ObjectInspector * @throws SerDeException */ + @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, ObjectInspectorOptions option) throws SerDeException { @@ -240,6 +243,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * @return The ObjectInspector * @throws SerDeException */ + @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar) throws SerDeException { @@ -261,6 +265,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * @return The ObjectInspector * @throws SerDeException */ + @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { @@ -282,30 +287,52 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * @return The ObjectInspector * @throws SerDeException */ + @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException { + LazyObjectInspectorParametersImpl lazyParams = + new LazyObjectInspectorParametersImpl(escaped, escapeChar, extendedBooleanLiteral, null); + return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, + lazyParams, option); + } + + /** + * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo. + * + * @param typeInfo The type information for the LazyObject + * @param separator The array of separators for delimiting each level + * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of + * separator, and map uses 2 levels: the first one for delimiting entries, the second one + * for delimiting key and values. + * @param lazyParams Params for lazy types + * @param option the {@link ObjectInspectorOption} + * @return The ObjectInspector + * @throws SerDeException + */ + public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, + byte[] separator, int separatorIndex, Text nullSequence, + LazyObjectInspectorParameters lazyParams, ObjectInspectorOptions option) throws SerDeException { ObjectInspector.Category c = typeInfo.getCategory(); switch (c) { case PRIMITIVE: return LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector( - (PrimitiveTypeInfo) typeInfo, escaped, escapeChar, extendedBooleanLiteral); + (PrimitiveTypeInfo) typeInfo, lazyParams); case MAP: return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( - createLazyObjectInspector(((MapTypeInfo) typeInfo) - .getMapKeyTypeInfo(), separator, separatorIndex + 2, - nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector( - ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator, - separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), + createLazyObjectInspector(((MapTypeInfo) typeInfo).getMapKeyTypeInfo(), + separator, separatorIndex + 2, nullSequence, lazyParams, option), + createLazyObjectInspector(((MapTypeInfo) typeInfo).getMapValueTypeInfo(), + separator, separatorIndex + 2, nullSequence, lazyParams, option), LazyUtils.getSeparator(separator, separatorIndex), LazyUtils.getSeparator(separator, separatorIndex+1), - nullSequence, escaped, escapeChar); + nullSequence, lazyParams); case LIST: return LazyObjectInspectorFactory.getLazySimpleListObjectInspector( createLazyObjectInspector(((ListTypeInfo) typeInfo) .getListElementTypeInfo(), separator, separatorIndex + 1, - nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex), - nullSequence, escaped, escapeChar); + nullSequence, lazyParams, option), LazyUtils.getSeparator(separator, separatorIndex), + nullSequence, lazyParams); case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; List fieldNames = structTypeInfo.getAllStructFieldNames(); @@ -315,25 +342,23 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, fieldTypeInfos.size()); for (int i = 0; i < fieldTypeInfos.size(); i++) { fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos - .get(i), separator, separatorIndex + 1, nullSequence, escaped, - escapeChar, extendedBooleanLiteral, option)); + .get(i), separator, separatorIndex + 1, nullSequence, lazyParams, option)); } return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( - fieldNames, fieldObjectInspectors, + fieldNames, fieldObjectInspectors, null, LazyUtils.getSeparator(separator, separatorIndex), - nullSequence, - false, escaped, escapeChar, option); + nullSequence, + false, lazyParams, option); case UNION: UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; List lazyOIs = new ArrayList(); for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) { lazyOIs.add(createLazyObjectInspector(uti, separator, - separatorIndex + 1, nullSequence, escaped, - escapeChar, extendedBooleanLiteral, option)); + separatorIndex + 1, nullSequence, lazyParams, option)); } return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs, LazyUtils.getSeparator(separator, separatorIndex), - nullSequence, escaped, escapeChar); + nullSequence, lazyParams); } throw new RuntimeException("Hive LazySerDe Internal error."); @@ -350,6 +375,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text, * boolean, byte) */ + @Deprecated public static ObjectInspector createLazyStructInspector( List columnNames, List typeInfos, byte[] separators, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, @@ -370,20 +396,43 @@ public static ObjectInspector createLazyStructInspector( * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text, * boolean, byte) */ + @Deprecated public static ObjectInspector createLazyStructInspector( List columnNames, List typeInfos, byte[] separators, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { + LazyObjectInspectorParametersImpl lazyParams = + new LazyObjectInspectorParametersImpl(escaped, escapeChar, extendedBooleanLiteral, null); + return createLazyStructInspector(columnNames, typeInfos, separators, + nullSequence, lastColumnTakesRest, lazyParams); + } + + /** + * Create a hierarchical ObjectInspector for LazyStruct with the given + * columnNames and columnTypeInfos. + * + * @param lastColumnTakesRest + * whether the last column of the struct should take the rest of the + * row if there are extra fields. + * @param lazyParams parameters for the lazy types + * @throws SerDeException + * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text, + * boolean, byte) + */ + public static ObjectInspector createLazyStructInspector( + List columnNames, List typeInfos, byte[] separators, + Text nullSequence, boolean lastColumnTakesRest, + LazyObjectInspectorParameters lazyParams) throws SerDeException { ArrayList columnObjectInspectors = new ArrayList( typeInfos.size()); for (int i = 0; i < typeInfos.size(); i++) { columnObjectInspectors.add(LazyFactory.createLazyObjectInspector( - typeInfos.get(i), separators, 1, nullSequence, escaped, escapeChar, - extendedBooleanLiteral)); + typeInfos.get(i), separators, 1, nullSequence, + lazyParams, ObjectInspectorOptions.JAVA)); } return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( - columnNames, columnObjectInspectors, separators[0], nullSequence, - lastColumnTakesRest, escaped, escapeChar); + columnNames, columnObjectInspectors, null, separators[0], nullSequence, + lastColumnTakesRest, lazyParams, ObjectInspectorOptions.JAVA); } /** @@ -394,15 +443,33 @@ public static ObjectInspector createLazyStructInspector( * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text, * boolean, byte) */ + @Deprecated public static ObjectInspector createColumnarStructInspector( List columnNames, List columnTypes, byte[] separators, Text nullSequence, boolean escaped, byte escapeChar) throws SerDeException { + LazyObjectInspectorParametersImpl lazyParams = + new LazyObjectInspectorParametersImpl(escaped, escapeChar, false, null); + return createColumnarStructInspector(columnNames, columnTypes, + separators, nullSequence, lazyParams); + } + + /** + * Create a hierarchical ObjectInspector for ColumnarStruct with the given + * columnNames and columnTypeInfos. + * @throws SerDeException + * + * @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text, + * boolean, byte) + */ + public static ObjectInspector createColumnarStructInspector( + List columnNames, List columnTypes, byte[] separators, + Text nullSequence, LazyObjectInspectorParameters lazyParams) throws SerDeException { ArrayList columnObjectInspectors = new ArrayList( columnTypes.size()); for (int i = 0; i < columnTypes.size(); i++) { columnObjectInspectors .add(LazyFactory.createLazyObjectInspector(columnTypes.get(i), - separators, 1, nullSequence, escaped, escapeChar, false)); + separators, 1, nullSequence, lazyParams, ObjectInspectorOptions.JAVA)); } return ObjectInspectorFactory.getColumnarStructObjectInspector(columnNames, columnObjectInspectors); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java index 95e30db..ecc4999 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.serde2.lazy; import java.io.IOException; +import java.nio.charset.Charset; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; @@ -36,6 +38,8 @@ import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -127,7 +131,7 @@ public static byte getByte(String altValue, byte defaultVal) { * SerDeParameters. * */ - public static class SerDeParameters { + public static class SerDeParameters implements LazyObjectInspectorParameters { byte[] separators = DefaultSeparators; String nullString; Text nullSequence; @@ -141,6 +145,20 @@ public static byte getByte(String altValue, byte defaultVal) { boolean[] needsEscape; boolean extendedBooleanLiteral; + List timestampFormats; + + public SerDeParameters() { + } + + public SerDeParameters( + boolean escaped, byte escapeChar, + boolean extendedBooleanLiteral, List timestampFormats) { + super(); + this.escaped = escaped; + this.escapeChar = escapeChar; + this.extendedBooleanLiteral = extendedBooleanLiteral; + this.timestampFormats = timestampFormats; + } public List getColumnTypes() { return columnTypes; @@ -181,6 +199,14 @@ public byte getEscapeChar() { public boolean[] getNeedsEscape() { return needsEscape; } + + public boolean isExtendedBooleanLiteral() { + return extendedBooleanLiteral; + } + + public List getTimestampFormats() { + return timestampFormats; + } } SerDeParameters serdeParams = null; @@ -206,8 +232,7 @@ public void initialize(Configuration job, Properties tbl) cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams .getColumnNames(), serdeParams.getColumnTypes(), serdeParams .getSeparators(), serdeParams.getNullSequence(), serdeParams - .isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams - .getEscapeChar(), serdeParams.extendedBooleanLiteral); + .isLastColumnTakesRest(), serdeParams); cachedLazyStruct = (LazyStruct) LazyFactory .createLazyObject(cachedObjectInspector); @@ -216,7 +241,7 @@ public void initialize(Configuration job, Properties tbl) + serdeParams.columnNames + " columnTypes=" + serdeParams.columnTypes + " separator=" + Arrays.asList(serdeParams.separators) + " nullstring=" + serdeParams.nullString + " lastColumnTakesRest=" - + serdeParams.lastColumnTakesRest); + + serdeParams.lastColumnTakesRest + " timestampFormats=" + serdeParams.timestampFormats); serializedSize = 0; stats = new SerDeStats(); @@ -324,6 +349,10 @@ public static SerDeParameters initSerdeParams(Configuration job, serdeParams.extendedBooleanLiteral = job == null ? false : job.getBoolean(ConfVars.HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL.varname, false); + + serdeParams.timestampFormats = + getTimestampFormatList(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)); + return serdeParams; } @@ -608,4 +637,61 @@ protected Writable transformToUTF8(Writable blob) { Text text = (Text)blob; return SerDeUtils.transformTextToUTF8(text, this.charset); } + + /** + * From the conf string value, parse out the list of timestamp formats. + * Format strings are delimited by commas. Escape characters allowed are '\,' and '\\'. + * @param val + * @return List of strings representing each timestamp format, or null if val is null. + * @throws IllegalArgumentException + */ + public static List getTimestampFormatList(String val) throws IllegalArgumentException { + if (val == null) { + return null; + } + + StringBuilder sb = new StringBuilder(); + List ret = new ArrayList(); + + boolean escaping = false; + for (int idx = 0; idx < val.length(); ++idx) { + char cur = val.charAt(idx); + if (escaping) { + switch (cur) { + case ',': + sb.append(cur); + break; + case '\\': + sb.append(cur); + break; + default: + throw new IllegalArgumentException("Invalid character after \\: '" + cur + "'"); + } + escaping = false; + } else { + switch (cur) { + case ',': + // End of pattern, add to pattern list if nonempty + if (sb.length() > 0) { + ret.add(sb.toString()); + } + sb.setLength(0); + break; + case '\\': + escaping = true; + break; + default: + sb.append(cur); + break; + } + } + } + + // Add any remaining chars in string builder as final pattern + if (sb.length() > 0) { + ret.add(sb.toString()); + } + + return ret; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java index 27895c5..66134e1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java @@ -72,7 +72,7 @@ public void init(ByteArrayRef bytes, int start, int length) { logExceptionMessage(bytes, start, length, "TIMESTAMP"); } else { try { - t = Timestamp.valueOf(s); + t = oi.getTimestampParser().parseTimestamp(s); isNull = false; } catch (IllegalArgumentException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 3943508..33b1b8f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -25,6 +25,7 @@ import java.nio.charset.CharacterCodingException; import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Properties; import org.apache.commons.codec.binary.Base64; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; @@ -450,6 +452,14 @@ public static void copyAndEscapeStringDataToText(byte[] inputBytes, int start, i } } + public static void addLazyParamsToSignature(LazyObjectInspectorParameters lazyParams, + List signature) { + signature.add(lazyParams.isEscaped()); + signature.add(lazyParams.getEscapeChar()); + signature.add(lazyParams.isExtendedBooleanLiteral()); + signature.add(lazyParams.getTimestampFormats()); + } + private LazyUtils() { // prevent instantiation } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyListObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyListObjectInspector.java index 9d66a78..2a0eafe 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyListObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyListObjectInspector.java @@ -23,6 +23,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.lazy.LazyArray; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Text; @@ -41,8 +43,7 @@ private ObjectInspector listElementObjectInspector; private byte separator; private Text nullSequence; - private boolean escaped; - private byte escapeChar; + private LazyObjectInspectorParameters lazyParams; protected LazyListObjectInspector() { super(); @@ -51,12 +52,11 @@ protected LazyListObjectInspector() { * Call ObjectInspectorFactory.getLazySimpleListObjectInspector instead. */ protected LazyListObjectInspector(ObjectInspector listElementObjectInspector, - byte separator, Text nullSequence, boolean escaped, byte escapeChar) { + byte separator, Text nullSequence, LazyObjectInspectorParameters lazyParams) { this.listElementObjectInspector = listElementObjectInspector; this.separator = separator; this.nullSequence = nullSequence; - this.escaped = escaped; - this.escapeChar = escapeChar; + this.lazyParams = lazyParams; } @Override @@ -120,11 +120,14 @@ public Text getNullSequence() { } public boolean isEscaped() { - return escaped; + return lazyParams.isEscaped(); } public byte getEscapeChar() { - return escapeChar; + return lazyParams.getEscapeChar(); } + public LazyObjectInspectorParameters getLazyParams() { + return lazyParams; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyMapObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyMapObjectInspector.java index ee870f5..98e985c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyMapObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyMapObjectInspector.java @@ -23,6 +23,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.lazy.LazyMap; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Text; @@ -43,11 +45,11 @@ private byte itemSeparator; private byte keyValueSeparator; private Text nullSequence; - private boolean escaped; - private byte escapeChar; + private LazyObjectInspectorParameters lazyParams; protected LazyMapObjectInspector() { super(); + lazyParams = new LazyObjectInspectorParametersImpl(); } /** * Call ObjectInspectorFactory.getStandardListObjectInspector instead. @@ -62,8 +64,19 @@ protected LazyMapObjectInspector(ObjectInspector mapKeyObjectInspector, this.itemSeparator = itemSeparator; this.keyValueSeparator = keyValueSeparator; this.nullSequence = nullSequence; - this.escaped = escaped; - this.escapeChar = escapeChar; + this.lazyParams = new LazyObjectInspectorParametersImpl(escaped, escapeChar, false, null); + } + + protected LazyMapObjectInspector(ObjectInspector mapKeyObjectInspector, + ObjectInspector mapValueObjectInspector, byte itemSeparator, + byte keyValueSeparator, Text nullSequence, LazyObjectInspectorParameters lazyParams) { + this.mapKeyObjectInspector = mapKeyObjectInspector; + this.mapValueObjectInspector = mapValueObjectInspector; + + this.itemSeparator = itemSeparator; + this.keyValueSeparator = keyValueSeparator; + this.nullSequence = nullSequence; + this.lazyParams = lazyParams; } @Override @@ -126,10 +139,14 @@ public Text getNullSequence() { } public boolean isEscaped() { - return escaped; + return lazyParams.isEscaped(); } public byte getEscapeChar() { - return escapeChar; + return lazyParams.getEscapeChar(); + } + + public LazyObjectInspectorParameters getLazyParams() { + return lazyParams; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java index 1abd8a5..607b073 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java @@ -23,6 +23,9 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.io.Text; @@ -43,6 +46,7 @@ static ConcurrentHashMap, LazySimpleStructObjectInspector> cachedLazySimpleStructObjectInspector = new ConcurrentHashMap, LazySimpleStructObjectInspector>(); + @Deprecated public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, List structFieldObjectInspectors, byte separator, @@ -52,7 +56,8 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector structFieldObjectInspectors, null, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar, ObjectInspectorOptions.JAVA); } - + + @Deprecated public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, List structFieldObjectInspectors, byte separator, @@ -63,6 +68,7 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector lastColumnTakesRest, escaped, escapeChar, option); } + @Deprecated public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, List structFieldObjectInspectors, List structFieldComments, @@ -72,39 +78,47 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar, ObjectInspectorOptions.JAVA); } - + + @Deprecated public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, List structFieldObjectInspectors, List structFieldComments, byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped,byte escapeChar, ObjectInspectorOptions option) { + + return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, + structFieldComments, separator, nullSequence, lastColumnTakesRest, + new LazyObjectInspectorParametersImpl(escaped, escapeChar, false, null), option); + } + + public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( + List structFieldNames, + List structFieldObjectInspectors, List structFieldComments, + byte separator, Text nullSequence, boolean lastColumnTakesRest, + LazyObjectInspectorParameters lazyParams, ObjectInspectorOptions option) { ArrayList signature = new ArrayList(); signature.add(structFieldNames); signature.add(structFieldObjectInspectors); signature.add(Byte.valueOf(separator)); signature.add(nullSequence.toString()); signature.add(Boolean.valueOf(lastColumnTakesRest)); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); + LazyUtils.addLazyParamsToSignature(lazyParams, signature); signature.add(option); if(structFieldComments != null) { signature.add(structFieldComments); } - LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector - .get(signature); + LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature); if (result == null) { switch (option) { case JAVA: result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, - structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, - escapeChar); + structFieldComments, separator, nullSequence, lastColumnTakesRest, lazyParams); break; case AVRO: result = new AvroLazyObjectInspector(structFieldNames, structFieldObjectInspectors, - structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, - escapeChar); + structFieldComments, separator, nullSequence, lastColumnTakesRest, lazyParams); break; default: throw new IllegalArgumentException("Illegal ObjectInspector type [" + option + "]"); @@ -118,20 +132,27 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector static ConcurrentHashMap, LazyListObjectInspector> cachedLazySimpleListObjectInspector = new ConcurrentHashMap, LazyListObjectInspector>(); + @Deprecated public static LazyListObjectInspector getLazySimpleListObjectInspector( ObjectInspector listElementObjectInspector, byte separator, Text nullSequence, boolean escaped, byte escapeChar) { + return getLazySimpleListObjectInspector(listElementObjectInspector, separator, nullSequence, + new LazyObjectInspectorParametersImpl(escaped, escapeChar, false, null)); + } + + public static LazyListObjectInspector getLazySimpleListObjectInspector( + ObjectInspector listElementObjectInspector, byte separator, + Text nullSequence, LazyObjectInspectorParameters lazyParams) { ArrayList signature = new ArrayList(); signature.add(listElementObjectInspector); signature.add(Byte.valueOf(separator)); signature.add(nullSequence.toString()); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); + LazyUtils.addLazyParamsToSignature(lazyParams, signature); LazyListObjectInspector result = cachedLazySimpleListObjectInspector .get(signature); if (result == null) { result = new LazyListObjectInspector(listElementObjectInspector, - separator, nullSequence, escaped, escapeChar); + separator, nullSequence, lazyParams); cachedLazySimpleListObjectInspector.put(signature, result); } return result; @@ -140,25 +161,34 @@ public static LazyListObjectInspector getLazySimpleListObjectInspector( static ConcurrentHashMap, LazyMapObjectInspector> cachedLazySimpleMapObjectInspector = new ConcurrentHashMap, LazyMapObjectInspector>(); + @Deprecated public static LazyMapObjectInspector getLazySimpleMapObjectInspector( ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector, byte itemSeparator, byte keyValueSeparator, Text nullSequence, boolean escaped, byte escapeChar) { + return getLazySimpleMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, + itemSeparator, keyValueSeparator, nullSequence, + new LazyObjectInspectorParametersImpl(escaped, escapeChar, false, null)); + } + + public static LazyMapObjectInspector getLazySimpleMapObjectInspector( + ObjectInspector mapKeyObjectInspector, + ObjectInspector mapValueObjectInspector, byte itemSeparator, + byte keyValueSeparator, Text nullSequence, LazyObjectInspectorParameters lazyParams) { ArrayList signature = new ArrayList(); signature.add(mapKeyObjectInspector); signature.add(mapValueObjectInspector); signature.add(Byte.valueOf(itemSeparator)); signature.add(Byte.valueOf(keyValueSeparator)); signature.add(nullSequence.toString()); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); + LazyUtils.addLazyParamsToSignature(lazyParams, signature); LazyMapObjectInspector result = cachedLazySimpleMapObjectInspector .get(signature); if (result == null) { result = new LazyMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, itemSeparator, keyValueSeparator, - nullSequence, escaped, escapeChar); + nullSequence, lazyParams); cachedLazySimpleMapObjectInspector.put(signature, result); } return result; @@ -168,20 +198,27 @@ public static LazyMapObjectInspector getLazySimpleMapObjectInspector( cachedLazyUnionObjectInspector = new ConcurrentHashMap, LazyUnionObjectInspector>(); + @Deprecated public static LazyUnionObjectInspector getLazyUnionObjectInspector( List ois, byte separator, Text nullSequence, boolean escaped, byte escapeChar) { + return getLazyUnionObjectInspector(ois, separator, nullSequence, + new LazyObjectInspectorParametersImpl(escaped, escapeChar, false, null)); + } + + public static LazyUnionObjectInspector getLazyUnionObjectInspector( + List ois, byte separator, Text nullSequence, + LazyObjectInspectorParameters lazyParams) { List signature = new ArrayList(); signature.add(ois); signature.add(Byte.valueOf(separator)); signature.add(nullSequence.toString()); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); + LazyUtils.addLazyParamsToSignature(lazyParams, signature); LazyUnionObjectInspector result = cachedLazyUnionObjectInspector .get(signature); if (result == null) { result = new LazyUnionObjectInspector(ois, separator, - nullSequence, escaped, escapeChar); + nullSequence, lazyParams); cachedLazyUnionObjectInspector.put(signature, result); } return result; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java index 9611e9f..7be9b20 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.serde2.BaseStructObjectInspector; import org.apache.hadoop.hive.serde2.StructObject; import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -44,8 +46,7 @@ private byte separator; private Text nullSequence; private boolean lastColumnTakesRest; - private boolean escaped; - private byte escapeChar; + private LazyObjectInspectorParameters lazyParams; protected LazySimpleStructObjectInspector() { super(); @@ -70,6 +71,7 @@ protected LazySimpleStructObjectInspector(List structFieldNames, nullSequence, lastColumnTakesRest, escaped, escapeChar); } + @Deprecated public LazySimpleStructObjectInspector(List structFieldNames, List structFieldObjectInspectors, List structFieldComments, byte separator, Text nullSequence, @@ -78,17 +80,36 @@ public LazySimpleStructObjectInspector(List structFieldNames, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar); } + public LazySimpleStructObjectInspector(List structFieldNames, + List structFieldObjectInspectors, + List structFieldComments, byte separator, Text nullSequence, + boolean lastColumnTakesRest, LazyObjectInspectorParameters lazyParams) { + init(structFieldNames, structFieldObjectInspectors, structFieldComments, + separator, nullSequence, lastColumnTakesRest, lazyParams); + } + protected void init(List structFieldNames, List structFieldObjectInspectors, List structFieldComments, byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { + LazyObjectInspectorParameters lazyParams = + new LazyObjectInspectorParametersImpl(escaped, escapeChar, + false, null); + init(structFieldNames, structFieldObjectInspectors, structFieldComments, + separator, nullSequence, lastColumnTakesRest, lazyParams); + } + + protected void init(List structFieldNames, + List structFieldObjectInspectors, + List structFieldComments, byte separator, + Text nullSequence, boolean lastColumnTakesRest, + LazyObjectInspectorParameters lazyParams) { init(structFieldNames, structFieldObjectInspectors, structFieldComments); this.separator = separator; this.nullSequence = nullSequence; this.lastColumnTakesRest = lastColumnTakesRest; - this.escaped = escaped; - this.escapeChar = escapeChar; + this.lazyParams = lazyParams; } // With Data @@ -150,11 +171,14 @@ public boolean getLastColumnTakesRest() { } public boolean isEscaped() { - return escaped; + return lazyParams.isEscaped(); } public byte getEscapeChar() { - return escapeChar; + return lazyParams.getEscapeChar(); } + public LazyObjectInspectorParameters getLazyParams() { + return lazyParams; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyUnionObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyUnionObjectInspector.java index 792a9a2..7d32397 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyUnionObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyUnionObjectInspector.java @@ -24,6 +24,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.lazy.LazyUnion; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; @@ -45,18 +47,16 @@ private List ois; private byte separator; private Text nullSequence; - private boolean escaped; - private byte escapeChar; + private LazyObjectInspectorParameters lazyParams; protected LazyUnionObjectInspector() { super(); } + protected LazyUnionObjectInspector( List ois, byte separator, - Text nullSequence, boolean escaped, - byte escapeChar) { - init(ois, separator, - nullSequence, escaped, escapeChar); + Text nullSequence, LazyObjectInspectorParameters lazyParams) { + init(ois, separator, nullSequence, lazyParams); } @Override @@ -66,12 +66,10 @@ public String getTypeName() { protected void init( List ois, byte separator, - Text nullSequence, boolean escaped, - byte escapeChar) { + Text nullSequence, LazyObjectInspectorParameters lazyParams) { this.separator = separator; this.nullSequence = nullSequence; - this.escaped = escaped; - this.escapeChar = escapeChar; + this.lazyParams = lazyParams; this.ois = new ArrayList(); this.ois.addAll(ois); } @@ -87,6 +85,7 @@ protected void init(List ois, byte separator, this.nullSequence = nullSequence; this.ois = new ArrayList(); this.ois.addAll(ois); + this.lazyParams = new LazyObjectInspectorParametersImpl(); } @Override @@ -103,13 +102,16 @@ public Text getNullSequence() { } public boolean isEscaped() { - return escaped; + return lazyParams.isEscaped(); } public byte getEscapeChar() { - return escapeChar; + return lazyParams.getEscapeChar(); } + public LazyObjectInspectorParameters getLazyParams() { + return lazyParams; + } @Override public Object getField(Object data) { if (data == null) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyObjectInspectorParameters.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyObjectInspectorParameters.java new file mode 100644 index 0000000..ac089ac --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyObjectInspectorParameters.java @@ -0,0 +1,10 @@ +package org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive; + +import java.util.List; + +public interface LazyObjectInspectorParameters { + public boolean isEscaped(); + public byte getEscapeChar(); + public boolean isExtendedBooleanLiteral(); + public List getTimestampFormats(); +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyObjectInspectorParametersImpl.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyObjectInspectorParametersImpl.java new file mode 100644 index 0000000..972787a --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyObjectInspectorParametersImpl.java @@ -0,0 +1,86 @@ +package org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive; + +import java.util.List; + +import org.apache.commons.lang.builder.HashCodeBuilder; + +public class LazyObjectInspectorParametersImpl implements + LazyObjectInspectorParameters { + protected boolean escaped; + protected byte escapeChar; + protected boolean extendedBooleanLiteral; + protected List timestampFormats; + + public LazyObjectInspectorParametersImpl() { + this.escaped = false; + this.extendedBooleanLiteral = false; + this.timestampFormats = null; + } + + public LazyObjectInspectorParametersImpl(boolean escaped, byte escapeChar, + boolean extendedBooleanLiteral, List timestampFormats) { + super(); + this.escaped = escaped; + this.escapeChar = escapeChar; + this.extendedBooleanLiteral = extendedBooleanLiteral; + this.timestampFormats = timestampFormats; + } + + public LazyObjectInspectorParametersImpl(LazyObjectInspectorParameters lazyParams) { + this.escaped = lazyParams.isEscaped(); + this.escapeChar = lazyParams.getEscapeChar(); + this.extendedBooleanLiteral = lazyParams.isExtendedBooleanLiteral(); + this.timestampFormats = lazyParams.getTimestampFormats(); + } + + public boolean isEscaped() { + return escaped; + } + + public byte getEscapeChar() { + return escapeChar; + } + + public boolean isExtendedBooleanLiteral() { + return extendedBooleanLiteral; + } + + public List getTimestampFormats() { + return timestampFormats; + } + + + protected boolean equals(LazyObjectInspectorParametersImpl other) { + boolean isEqual = this.escaped == other.escaped + && this.escapeChar == other.escapeChar + && this.extendedBooleanLiteral == other.extendedBooleanLiteral; + if (isEqual) { + if (this.timestampFormats != other.timestampFormats) { + if (this.timestampFormats != null && other.timestampFormats != null) { + isEqual = this.timestampFormats.equals(other.timestampFormats); + } else { + // One is null and one is not - they are not equal + isEqual = false; + } + } + } + return isEqual; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof LazyObjectInspectorParametersImpl)) { + return false; + } + return equals((LazyObjectInspectorParametersImpl) obj); + } + + @Override + public int hashCode() { + return new HashCodeBuilder().append(escaped).append(escapeChar) + .append(extendedBooleanLiteral).append(timestampFormats).toHashCode(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java index 08fec77..18512f4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.hadoop.hive.serde.serdeConstants; @@ -118,17 +119,28 @@ private LazyPrimitiveObjectInspectorFactory() { public static AbstractPrimitiveLazyObjectInspector getLazyObjectInspector( PrimitiveTypeInfo typeInfo, boolean escaped, byte escapeChar, boolean extBoolean) { + LazyObjectInspectorParameters lazyParams = new LazyObjectInspectorParametersImpl( + escaped, escapeChar, extBoolean, null); + return getLazyObjectInspector(typeInfo, lazyParams); + } + + public static AbstractPrimitiveLazyObjectInspector getLazyObjectInspector( + PrimitiveTypeInfo typeInfo, LazyObjectInspectorParameters lazyParams) { PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory(); switch(primitiveCategory) { case STRING: - return getLazyStringObjectInspector(escaped, escapeChar); + return getLazyStringObjectInspector(lazyParams.isEscaped(), lazyParams.getEscapeChar()); case CHAR: - return getLazyHiveCharObjectInspector((CharTypeInfo)typeInfo, escaped, escapeChar); + return getLazyHiveCharObjectInspector((CharTypeInfo)typeInfo, + lazyParams.isEscaped(), lazyParams.getEscapeChar()); case VARCHAR: - return getLazyHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo, escaped, escapeChar); + return getLazyHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo, + lazyParams.isEscaped(), lazyParams.getEscapeChar()); case BOOLEAN: - return getLazyBooleanObjectInspector(extBoolean); + return getLazyBooleanObjectInspector(lazyParams.isExtendedBooleanLiteral()); + case TIMESTAMP: + return getLazyTimestampObjectInspector(lazyParams.getTimestampFormats()); default: return getLazyObjectInspector(typeInfo); } @@ -205,6 +217,25 @@ public static LazyHiveVarcharObjectInspector getLazyHiveVarcharObjectInspector( return result; } + public static LazyTimestampObjectInspector getLazyTimestampObjectInspector( + List tsFormats) { + if (tsFormats == null) { + // No timestamp format specified, just use default lazy inspector + return (LazyTimestampObjectInspector) getLazyObjectInspector(TypeInfoFactory.timestampTypeInfo); + } + + ArrayList signature = new ArrayList(); + signature.add(TypeInfoFactory.timestampTypeInfo); + signature.add(tsFormats); + LazyTimestampObjectInspector result = (LazyTimestampObjectInspector) cachedLazyStringTypeOIs + .get(signature); + if (result == null) { + result = new LazyTimestampObjectInspector(tsFormats); + cachedLazyStringTypeOIs.put(signature, result); + } + return result; + } + private static LazyBooleanObjectInspector getLazyBooleanObjectInspector(boolean extLiteral) { return extLiteral ? LAZY_EXT_BOOLEAN_OBJECT_INSPECTOR : LAZY_BOOLEAN_OBJECT_INSPECTOR; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampObjectInspector.java index 0d15054..3829b08 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyTimestampObjectInspector.java @@ -18,18 +18,30 @@ package org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive; import java.sql.Timestamp; +import java.util.List; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hive.common.util.TimestampParser; public class LazyTimestampObjectInspector extends AbstractPrimitiveLazyObjectInspector implements TimestampObjectInspector { - protected LazyTimestampObjectInspector() { + protected List timestampFormats = null; + protected TimestampParser timestampParser = null; + + LazyTimestampObjectInspector() { super(TypeInfoFactory.timestampTypeInfo); + timestampParser = new TimestampParser(); + } + + LazyTimestampObjectInspector(List tsFormats) { + super(TypeInfoFactory.timestampTypeInfo); + this.timestampFormats = tsFormats; + timestampParser = new TimestampParser(tsFormats); } public Object copyObject(Object o) { @@ -39,4 +51,13 @@ public Object copyObject(Object o) { public Timestamp getPrimitiveJavaObject(Object o) { return o == null ? null : ((LazyTimestamp) o).getWritableObject().getTimestamp(); } + + public List getTimestampFormats() { + return timestampFormats; + } + + public TimestampParser getTimestampParser() { + return timestampParser; + } + }