diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index ec24c10845..3fb135ff14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -137,9 +137,10 @@ public static ColumnVector createColumnVector(TypeInfo typeInfo, case SHORT: case INT: case LONG: - case DATE: case INTERVAL_YEAR_MONTH: return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case DATE: + return new DateColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case TIMESTAMP: return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case INTERVAL_DAY_TIME: @@ -581,6 +582,8 @@ public static ColumnVector makeLikeColumnVector(ColumnVector source return new DecimalColumnVector(dec64ColVector.vector.length, dec64ColVector.precision, dec64ColVector.scale); + } else if (source instanceof DateColumnVector) { + return new DateColumnVector(((DateColumnVector) source).vector.length); } else if (source instanceof LongColumnVector) { return new LongColumnVector(((LongColumnVector) source).vector.length); } else if (source instanceof DoubleColumnVector) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java index 67bbc3e77c..9cd957ceea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -21,7 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -46,8 +46,8 @@ private transient final Date date = new Date(0); // Transient members initialized by transientInit method. - private transient LongColumnVector dateVector1; - private transient LongColumnVector dateVector2; + private transient DateColumnVector dateVector1; + private transient DateColumnVector dateVector2; public VectorUDFDateDiffColCol(int colNum1, int colNum2, int outputColumnNum) { super(outputColumnNum); @@ -67,8 +67,8 @@ public VectorUDFDateDiffColCol() { public void transientInit(Configuration conf) throws HiveException { super.transientInit(conf); - dateVector1 = new LongColumnVector(); - dateVector2 = new LongColumnVector(); + dateVector1 = new DateColumnVector(); + dateVector2 = new DateColumnVector(); } @Override @@ -83,7 +83,7 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + DateColumnVector outV = (DateColumnVector) batch.cols[outputColumnNum]; long[] outputVector = outV.vector; if (n <= 0) { // Nothing to do @@ -95,8 +95,8 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { */ NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outV, batch.selected, batch.size, batch.selectedInUse); - LongColumnVector convertedVector1 = toDateArray(batch, inputTypeInfos[0], inputColVector1, dateVector1); - LongColumnVector convertedVector2 = toDateArray(batch, inputTypeInfos[1], inputColVector2, dateVector2); + DateColumnVector convertedVector1 = toDateArray(batch, inputTypeInfos[0], inputColVector1, dateVector1); + DateColumnVector convertedVector2 = toDateArray(batch, inputTypeInfos[1], inputColVector2, dateVector2); // Now disregard null in second pass. if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -168,21 +168,21 @@ public void evaluate(VectorizedRowBatch batch) throws HiveException { } } - private LongColumnVector toDateArray(VectorizedRowBatch batch, TypeInfo typeInfo, - ColumnVector inputColVector, LongColumnVector dateVector) { + private DateColumnVector toDateArray(VectorizedRowBatch batch, TypeInfo typeInfo, + ColumnVector inputColVector, DateColumnVector dateVector) { PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); int size = batch.size; if (primitiveCategory == PrimitiveCategory.DATE) { - return (LongColumnVector) inputColVector; + return (DateColumnVector) inputColVector; } if (size > dateVector.vector.length) { if (dateVector1 == dateVector) { - dateVector1 = new LongColumnVector(size * 2); + dateVector1 = new DateColumnVector(size * 2); dateVector = dateVector1; } else { - dateVector2 = new LongColumnVector(size * 2); + dateVector2 = new DateColumnVector(size * 2); dateVector = dateVector2; } } @@ -207,7 +207,7 @@ private LongColumnVector toDateArray(VectorizedRowBatch batch, TypeInfo typeInfo // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. public void copySelected( - BytesColumnVector input, boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + BytesColumnVector input, boolean selectedInUse, int[] sel, int size, DateColumnVector output) { output.isRepeating = false; @@ -292,7 +292,7 @@ public void copySelected( } } - private void setDays(BytesColumnVector input, LongColumnVector output, int i) { + private void setDays(BytesColumnVector input, DateColumnVector output, int i) { String string = new String(input.vector[i], input.start[i], input.length[i]); try { date.setTime(formatter.parse(string).getTime()); @@ -306,7 +306,7 @@ private void setDays(BytesColumnVector input, LongColumnVector output, int i) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. public void copySelected( - TimestampColumnVector input, boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + TimestampColumnVector input, boolean selectedInUse, int[] sel, int size, DateColumnVector output) { output.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java index 5c1ce70075..15853161c0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -283,7 +284,6 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor case INT: case BYTE: case SHORT: - case DATE: case INTERVAL_YEAR_MONTH: case LONG: lcv.child = new LongColumnVector(total); @@ -291,6 +291,12 @@ private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory categor ((LongColumnVector) lcv.child).vector[i] = ((List) valueList).get(i); } break; + case DATE: + lcv.child = new DateColumnVector(total); + for (int i = 0; i < valueList.size(); i++) { + ((DateColumnVector) lcv.child).vector[i] = ((List) valueList).get(i); + } + break; case DOUBLE: lcv.child = new DoubleColumnVector(total); for (int i = 0; i < valueList.size(); i++) { @@ -367,7 +373,18 @@ private ColumnVector getChildData(ListColumnVector lcv, int index) { int length = (int)lcv.lengths[index]; ColumnVector child = lcv.child; ColumnVector resultCV = null; - if (child instanceof LongColumnVector) { + if (child instanceof DateColumnVector) { + resultCV = new DateColumnVector(length); + try { + System.arraycopy(((DateColumnVector) lcv.child).vector, start, + ((DateColumnVector) resultCV).vector, 0, length); + } catch (Exception e) { + throw new RuntimeException( + "Fail to copy at index:" + index + ", start:" + start + ",length:" + length + ",vec " + + "len:" + ((LongColumnVector) lcv.child).vector.length + ", offset len:" + lcv + .offsets.length + ", len len:" + lcv.lengths.length, e); + } + } else if (child instanceof LongColumnVector) { resultCV = new LongColumnVector(length); try { System.arraycopy(((LongColumnVector) lcv.child).vector, start, diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java index 258b47bbec..6ebac6c97b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java @@ -32,15 +32,19 @@ static final String[] COLORS = {"red", "yellow", "green", "blue", "violet", "orange"}; private static byte[][] colorsBytes; + private static final int TEST_BATCH_SIZE = 4; + private VectorizedRowBatch makeBatch() { - VectorizedRowBatch batch = new VectorizedRowBatch(3); + VectorizedRowBatch batch = new VectorizedRowBatch(TEST_BATCH_SIZE); LongColumnVector lv = new LongColumnVector(); + LongColumnVector dav = new DateColumnVector(); DoubleColumnVector dv = new DoubleColumnVector(); BytesColumnVector bv = new BytesColumnVector(); setSampleStringCol(bv); batch.cols[0] = lv; - batch.cols[1] = dv; - batch.cols[2] = bv; + batch.cols[1] = dav; + batch.cols[2] = dv; + batch.cols[3] = bv; addRandomNulls(batch); return batch; } @@ -52,13 +56,15 @@ private VectorizedRowBatch makeBatch() { */ public void testVectorizedRowBatchCreate() { VectorizedRowBatch batch = makeBatch(); - Assert.assertEquals(3, batch.numCols); + Assert.assertEquals(TEST_BATCH_SIZE, batch.numCols); Assert.assertEquals(VectorizedRowBatch.DEFAULT_SIZE, batch.size); Assert.assertEquals(((LongColumnVector) batch.cols[0]).vector.length, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertEquals(((DoubleColumnVector) batch.cols[1]).vector.length, + Assert.assertEquals(((DateColumnVector) batch.cols[1]).vector.length, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertEquals(((DoubleColumnVector) batch.cols[2]).vector.length, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertEquals(((BytesColumnVector) batch.cols[2]).vector.length, + Assert.assertEquals(((BytesColumnVector) batch.cols[3]).vector.length, VectorizedRowBatch.DEFAULT_SIZE); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java index 078b2e9d34..cf340a2c43 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java @@ -32,7 +32,7 @@ import org.junit.Assert; import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -97,13 +97,13 @@ private TimestampLocalTZWritable toTimestampLocalTZWritable(long daysSinceEpoch) private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); - LongColumnVector lcv = new LongColumnVector(size); + DateColumnVector lcv = new DateColumnVector(size); Random rand = new Random(seed); for (int i = 0; i < size; i++) { lcv.vector[i] = (rand.nextInt()); } batch.cols[0] = lcv; - batch.cols[1] = new LongColumnVector(size); + batch.cols[1] = new DateColumnVector(size); batch.size = size; return batch; } @@ -113,12 +113,12 @@ private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) { */ private VectorizedRowBatch getVectorizedRowBatch(int[] inputs, int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); - LongColumnVector lcv = new LongColumnVector(size); + DateColumnVector lcv = new DateColumnVector(size); for (int i = 0; i < size; i++) { lcv.vector[i] = inputs[i % inputs.length]; } batch.cols[0] = lcv; - batch.cols[1] = new LongColumnVector(size); + batch.cols[1] = new DateColumnVector(size); batch.size = size; return batch; } @@ -145,8 +145,8 @@ private void verifyUDFYear(VectorizedRowBatch batch) throws HiveException { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; - long y = ((LongColumnVector) batch.cols[out]).vector[i]; + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((DateColumnVector) batch.cols[out]).vector[i]; compareToUDFYearDate(t, (int) y); } else { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); @@ -158,8 +158,8 @@ private void verifyUDFYear(VectorizedRowBatch batch) throws HiveException { public void testVectorUDFYear() throws HiveException { VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); - Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + Assert.assertTrue(((DateColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((DateColumnVector) batch.cols[1]).isRepeating); verifyUDFYear(batch); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFYear(batch); @@ -209,8 +209,8 @@ private void verifyUDFDayOfMonth(VectorizedRowBatch batch) throws HiveException if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; - long y = ((LongColumnVector) batch.cols[out]).vector[i]; + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((DateColumnVector) batch.cols[out]).vector[i]; compareToUDFDayOfMonthDate(t, (int) y); } else { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); @@ -222,8 +222,8 @@ private void verifyUDFDayOfMonth(VectorizedRowBatch batch) throws HiveException public void testVectorUDFDayOfMonth() throws HiveException { VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); - Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + Assert.assertTrue(((DateColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((DateColumnVector) batch.cols[1]).isRepeating); verifyUDFDayOfMonth(batch); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFDayOfMonth(batch); @@ -273,8 +273,8 @@ private void verifyUDFMonth(VectorizedRowBatch batch) throws HiveException { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; - long y = ((LongColumnVector) batch.cols[out]).vector[i]; + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((DateColumnVector) batch.cols[out]).vector[i]; compareToUDFMonthDate(t, (int) y); } else { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); @@ -286,8 +286,8 @@ private void verifyUDFMonth(VectorizedRowBatch batch) throws HiveException { public void testVectorUDFMonth() throws HiveException { VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); - Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + Assert.assertTrue(((DateColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((DateColumnVector) batch.cols[1]).isRepeating); verifyUDFMonth(batch); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFMonth(batch); @@ -345,8 +345,8 @@ private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch) throws HiveExcepti if (!batch.cols[out].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = ((LongColumnVector) batch.cols[in]).vector[i]; - long y = ((LongColumnVector) batch.cols[out]).vector[i]; + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((DateColumnVector) batch.cols[out]).vector[i]; compareToUDFUnixTimeStampDate(t, y); } else { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); @@ -358,8 +358,8 @@ private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch) throws HiveExcepti public void testVectorUDFUnixTimeStamp() throws HiveException { VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); - Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + Assert.assertTrue(((DateColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((DateColumnVector) batch.cols[1]).isRepeating); verifyUDFUnixTimeStamp(batch); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFUnixTimeStamp(batch); @@ -405,8 +405,8 @@ private void verifyUDFWeekOfYear(VectorizedRowBatch batch) throws HiveException for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { - long t = ((LongColumnVector) batch.cols[in]).vector[i]; - long y = ((LongColumnVector) batch.cols[out]).vector[i]; + long t = ((DateColumnVector) batch.cols[in]).vector[i]; + long y = ((DateColumnVector) batch.cols[out]).vector[i]; compareToUDFWeekOfYearDate(t, (int) y); } else { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); @@ -418,8 +418,8 @@ private void verifyUDFWeekOfYear(VectorizedRowBatch batch) throws HiveException public void testVectorUDFWeekOfYear() throws HiveException { VectorizedRowBatch batch = getVectorizedRowBatch(new int[] {0}, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); - Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); + Assert.assertTrue(((DateColumnVector) batch.cols[1]).noNulls); + Assert.assertFalse(((DateColumnVector) batch.cols[1]).isRepeating); verifyUDFWeekOfYear(batch); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFWeekOfYear(batch); @@ -461,7 +461,7 @@ public Void call() throws Exception { VectorUDFDateString udf = new VectorUDFDateString(0, 1); VectorizedRowBatch batch = new VectorizedRowBatch(2, batchSize); BytesColumnVector in = new BytesColumnVector(batchSize); - LongColumnVector out = new LongColumnVector(batchSize); + DateColumnVector out = new DateColumnVector(batchSize); batch.cols[0] = in; batch.cols[1] = out; for (int i = 0; i < batchSize; i++) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java index f42282c40f..2ceb28ed5d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java @@ -21,7 +21,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DateColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -63,8 +63,8 @@ private long newRandom(int i) { return random.nextInt(i); } - private LongColumnVector newRandomLongColumnVector(int range, int size) { - LongColumnVector vector = new LongColumnVector(size); + private DateColumnVector newRandomDateColumnVector(int range, int size) { + DateColumnVector vector = new DateColumnVector(size); for (int i = 0; i < size; i++) { vector.vector[i] = random.nextInt(range); } @@ -83,7 +83,7 @@ private TypeInfo primitiveCategoryToTypeInfo(PrimitiveCategory primitiveCategory throw new RuntimeException("Unexpected primitive category " + primitiveCategory); } } - private TimestampColumnVector toTimestamp(LongColumnVector date) { + private TimestampColumnVector toTimestamp(DateColumnVector date) { TimestampColumnVector vector = new TimestampColumnVector(size); for (int i = 0; i < size; i++) { if (date.isNull[i]) { @@ -100,7 +100,7 @@ private Timestamp toTimestamp(long date) { return new Timestamp(DateWritableV2.daysToMillis((int) date)); } - private BytesColumnVector toString(LongColumnVector date) { + private BytesColumnVector toString(DateColumnVector date) { BytesColumnVector bcv = new BytesColumnVector(size); for (int i = 0; i < size; i++) { if (date.isNull[i]) { @@ -121,7 +121,7 @@ private BytesColumnVector toString(LongColumnVector date) { } private void validateDateAdd(VectorizedRowBatch batch, PrimitiveCategory colType1, long scalar2, - boolean isPositive, LongColumnVector date1) + boolean isPositive, DateColumnVector date1) throws HiveException { VectorUDFDateAddColScalar udf; if (isPositive) { @@ -132,7 +132,7 @@ private void validateDateAdd(VectorizedRowBatch batch, PrimitiveCategory colType udf.setInputTypeInfos(new TypeInfo[] {primitiveCategoryToTypeInfo(colType1), TypeInfoFactory.voidTypeInfo}); udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[1]; + DateColumnVector output = (DateColumnVector) batch.cols[1]; try { for (int i = 0; i < size; i++) { @@ -154,7 +154,7 @@ private void validateDateAdd(VectorizedRowBatch batch, PrimitiveCategory colType } } - private ColumnVector castTo(LongColumnVector date, PrimitiveCategory type) { + private ColumnVector castTo(DateColumnVector date, PrimitiveCategory type) { switch (type) { case DATE: return date; @@ -173,10 +173,10 @@ private ColumnVector castTo(LongColumnVector date, PrimitiveCategory type) { private void testDateAddColScalar(PrimitiveCategory colType1, boolean isPositive) throws HiveException { - LongColumnVector date1 = newRandomLongColumnVector(10000, size); + DateColumnVector date1 = newRandomDateColumnVector(10000, size); ColumnVector col1 = castTo(date1, colType1); long scalar2 = newRandom(1000); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector output = new DateColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = col1; @@ -197,7 +197,7 @@ public void testDateAddColScalar() throws HiveException { udf.transientInit(hiveConf); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; @@ -217,7 +217,7 @@ public void testDateSubColScalar() throws HiveException { udf.transientInit(hiveConf); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; @@ -227,7 +227,7 @@ public void testDateSubColScalar() throws HiveException { Assert.assertEquals(batch.cols[1].isNull[0], true); } - private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnVector date2, + private void validateDateAdd(VectorizedRowBatch batch, long scalar1, DateColumnVector date2, PrimitiveCategory colType1, boolean isPositive) throws HiveException { VectorExpression udf = null; @@ -268,7 +268,7 @@ private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnV udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[1]; + DateColumnVector output = (DateColumnVector) batch.cols[1]; try { for (int i = 0; i < date2.vector.length; i++) { String expected; @@ -290,10 +290,10 @@ private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnV private void testDateAddScalarCol(PrimitiveCategory colType1, boolean isPositive) throws HiveException { - LongColumnVector date2 = newRandomLongColumnVector(10000, size); + DateColumnVector date2 = newRandomDateColumnVector(10000, size); long scalar1 = newRandom(1000); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector output = new DateColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = date2; @@ -313,8 +313,8 @@ public void testDateAddScalarCol() throws HiveException { udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(hiveConf); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); - batch.cols[0] = new LongColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[0] = new DateColumnVector(1); + batch.cols[1] = new DateColumnVector(1); udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); } @@ -328,14 +328,14 @@ public void testDateSubScalarCol() throws HiveException { udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(hiveConf); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); - batch.cols[0] = new LongColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[0] = new DateColumnVector(1); + batch.cols[1] = new DateColumnVector(1); udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); } private void validateDateAdd(VectorizedRowBatch batch, - LongColumnVector date1, LongColumnVector date2, + DateColumnVector date1, DateColumnVector date2, PrimitiveCategory colType1, boolean isPositive) throws HiveException { VectorExpression udf; @@ -347,7 +347,7 @@ private void validateDateAdd(VectorizedRowBatch batch, udf.setInputTypeInfos(new TypeInfo[] {primitiveCategoryToTypeInfo(colType1), TypeInfoFactory.voidTypeInfo}); udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[2]; + DateColumnVector output = (DateColumnVector) batch.cols[2]; try { for (int i = 0; i < date2.vector.length; i++) { String expected; @@ -369,11 +369,11 @@ private void validateDateAdd(VectorizedRowBatch batch, private void testDateAddColCol(PrimitiveCategory colType1, boolean isPositive) throws HiveException { - LongColumnVector date1 = newRandomLongColumnVector(10000, size); - LongColumnVector days2 = newRandomLongColumnVector(1000, size); + DateColumnVector date1 = newRandomDateColumnVector(10000, size); + DateColumnVector days2 = newRandomDateColumnVector(1000, size); ColumnVector col1 = castTo(date1, colType1); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector output = new DateColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(3, size); batch.cols[0] = col1; @@ -402,8 +402,8 @@ public void testDateAddColCol() throws HiveException { udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(hiveConf); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); - batch.cols[2] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); + batch.cols[2] = new DateColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; bcv.start[0] = 0; @@ -425,8 +425,8 @@ public void testDateSubColCol() throws HiveException { udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(hiveConf); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); - batch.cols[2] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); + batch.cols[2] = new DateColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; bcv.start[0] = 0; @@ -437,7 +437,7 @@ public void testDateSubColCol() throws HiveException { private void validateDateDiff(VectorizedRowBatch batch, long scalar1, PrimitiveCategory scalarType1, PrimitiveCategory colType2, - LongColumnVector date2) + DateColumnVector date2) throws HiveException { VectorExpression udf = null; switch (scalarType1) { @@ -459,7 +459,7 @@ private void validateDateDiff(VectorizedRowBatch batch, long scalar1, udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[1]; + DateColumnVector output = (DateColumnVector) batch.cols[1]; for (int i = 0; i < date2.vector.length; i++) { Assert.assertEquals(scalar1 - date2.vector[i], output.vector[i]); } @@ -469,8 +469,8 @@ private void validateDateDiff(VectorizedRowBatch batch, long scalar1, public void testDateDiffScalarCol() throws HiveException { for (PrimitiveCategory scalarType1 : dateTimestampStringTypes) { for (PrimitiveCategory colType2 : dateTimestampStringTypes) { - LongColumnVector date2 = newRandomLongColumnVector(10000, size); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector date2 = newRandomDateColumnVector(10000, size); + DateColumnVector output = new DateColumnVector(size); ColumnVector col2 = castTo(date2, colType2); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = col2; @@ -492,7 +492,7 @@ public void testDateDiffScalarCol() throws HiveException { udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo}); udf.transientInit(hiveConf); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; @@ -504,13 +504,13 @@ public void testDateDiffScalarCol() throws HiveException { udf = new VectorUDFDateDiffScalarCol(bytes, 0, 1); udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(hiveConf); - batch.cols[0] = new LongColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[0] = new DateColumnVector(1); + batch.cols[1] = new DateColumnVector(1); udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); } - private void validateDateDiff(VectorizedRowBatch batch, LongColumnVector date1, long scalar2, + private void validateDateDiff(VectorizedRowBatch batch, DateColumnVector date1, long scalar2, PrimitiveCategory colType1, PrimitiveCategory scalarType2) throws HiveException { VectorExpression udf = null; @@ -532,7 +532,7 @@ private void validateDateDiff(VectorizedRowBatch batch, LongColumnVector date1, udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[1]; + DateColumnVector output = (DateColumnVector) batch.cols[1]; for (int i = 0; i < date1.vector.length; i++) { Assert.assertEquals(date1.vector[i] - scalar2, output.vector[i]); } @@ -542,8 +542,8 @@ private void validateDateDiff(VectorizedRowBatch batch, LongColumnVector date1, public void testDateDiffColScalar() throws HiveException { for (PrimitiveCategory colType1 : dateTimestampStringTypes) { for (PrimitiveCategory scalarType2 : dateTimestampStringTypes) { - LongColumnVector date1 = newRandomLongColumnVector(10000, size); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector date1 = newRandomDateColumnVector(10000, size); + DateColumnVector output = new DateColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = castTo(date1, colType1); batch.cols[1] = output; @@ -562,7 +562,7 @@ public void testDateDiffColScalar() throws HiveException { udf = new VectorUDFDateDiffColScalar(0, 0L, 1); udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo}); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; @@ -574,21 +574,21 @@ public void testDateDiffColScalar() throws HiveException { udf = new VectorUDFDateDiffColScalar(0, bytes, 1); udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.stringTypeInfo}); udf.transientInit(hiveConf); - batch.cols[0] = new LongColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[0] = new DateColumnVector(1); + batch.cols[1] = new DateColumnVector(1); udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); } private void validateDateDiff(VectorizedRowBatch batch, - LongColumnVector date1, LongColumnVector date2, + DateColumnVector date1, DateColumnVector date2, PrimitiveCategory colType1, PrimitiveCategory colType2) throws HiveException { VectorExpression udf = new VectorUDFDateDiffColCol(0, 1, 2); udf.setInputTypeInfos(new TypeInfo[] {primitiveCategoryToTypeInfo(colType1), primitiveCategoryToTypeInfo(colType2)}); udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[2]; + DateColumnVector output = (DateColumnVector) batch.cols[2]; for (int i = 0; i < date1.vector.length; i++) { if (date1.isNull[i] || date2.isNull[i]) { Assert.assertTrue(output.isNull[i]); @@ -602,9 +602,9 @@ private void validateDateDiff(VectorizedRowBatch batch, public void testDateDiffColCol() throws HiveException { for (PrimitiveCategory colType1 : dateTimestampStringTypes) { for (PrimitiveCategory colType2 : dateTimestampStringTypes) { - LongColumnVector date1 = newRandomLongColumnVector(10000, size); - LongColumnVector date2 = newRandomLongColumnVector(10000, size); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector date1 = newRandomDateColumnVector(10000, size); + DateColumnVector date2 = newRandomDateColumnVector(10000, size); + DateColumnVector output = new DateColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(3, size); batch.cols[0] = castTo(date1, colType1); @@ -630,7 +630,7 @@ public void testDateDiffColCol() throws HiveException { udf.transientInit(hiveConf); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new TimestampColumnVector(1); - batch.cols[2] = new LongColumnVector(1); + batch.cols[2] = new DateColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; bcv.start[0] = 0; @@ -642,7 +642,7 @@ public void testDateDiffColCol() throws HiveException { udf.transientInit(hiveConf); batch.cols[0] = new TimestampColumnVector(1); batch.cols[1] = new BytesColumnVector(1); - batch.cols[2] = new LongColumnVector(1); + batch.cols[2] = new DateColumnVector(1); bcv = (BytesColumnVector) batch.cols[1]; bcv.vector[0] = bytes; bcv.start[0] = 0; @@ -652,7 +652,7 @@ public void testDateDiffColCol() throws HiveException { } private void validateDate(VectorizedRowBatch batch, PrimitiveCategory colType, - LongColumnVector date) throws HiveException { + DateColumnVector date) throws HiveException { VectorExpression udf; if (colType == PrimitiveCategory.STRING) { udf = new VectorUDFDateString(0, 1); @@ -665,7 +665,7 @@ private void validateDate(VectorizedRowBatch batch, PrimitiveCategory colType, udf.setInputTypeInfos(new TypeInfo[] {primitiveCategoryToTypeInfo(colType)}); udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[1]; + DateColumnVector output = (DateColumnVector) batch.cols[1]; for (int i = 0; i < size; i++) { String actual; @@ -689,8 +689,8 @@ public void testDate() throws HiveException { if (colType == PrimitiveCategory.DATE) { continue; } - LongColumnVector date = newRandomLongColumnVector(10000, size); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector date = newRandomDateColumnVector(10000, size); + DateColumnVector output = new DateColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = castTo(date, colType); @@ -707,7 +707,7 @@ public void testDate() throws HiveException { udf.transientInit(hiveConf); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; @@ -718,7 +718,7 @@ public void testDate() throws HiveException { } private void validateToDate(VectorizedRowBatch batch, PrimitiveCategory colType, - LongColumnVector date) throws HiveException { + DateColumnVector date) throws HiveException { VectorExpression udf; if (colType == PrimitiveCategory.STRING || colType == PrimitiveCategory.CHAR || @@ -732,7 +732,7 @@ private void validateToDate(VectorizedRowBatch batch, PrimitiveCategory colType, udf.setInputTypeInfos(new TypeInfo[] {primitiveCategoryToTypeInfo(colType)}); udf.transientInit(hiveConf); udf.evaluate(batch); - LongColumnVector output = (LongColumnVector) batch.cols[1]; + DateColumnVector output = (DateColumnVector) batch.cols[1]; for (int i = 0; i < size; i++) { long actual = output.vector[i]; @@ -749,8 +749,8 @@ private void validateToDate(VectorizedRowBatch batch, PrimitiveCategory colType, public void testToDate() throws HiveException { for (PrimitiveCategory type : Arrays.asList(PrimitiveCategory.TIMESTAMP, PrimitiveCategory.STRING)) { - LongColumnVector date = newRandomLongColumnVector(10000, size); - LongColumnVector output = new LongColumnVector(size); + DateColumnVector date = newRandomDateColumnVector(10000, size); + DateColumnVector output = new DateColumnVector(size); VectorizedRowBatch batch = new VectorizedRowBatch(2, size); batch.cols[0] = castTo(date, type); @@ -767,7 +767,7 @@ public void testToDate() throws HiveException { udf.transientInit(hiveConf); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[1] = new DateColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java new file mode 100644 index 0000000000..3b79200be3 --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Date; +import java.text.SimpleDateFormat; +import java.util.GregorianCalendar; + +/** + * This class extends LongColumnVector in order to introduce some date-specific semantics. + */ +public class DateColumnVector extends LongColumnVector { + public static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR = new GregorianCalendar(); + public static final GregorianCalendar GREGORIAN_CALENDAR = new GregorianCalendar(); + + private static final SimpleDateFormat PROLEPTIC_GREGORIAN_DATE_FORMATTER = + new SimpleDateFormat("yyyy-MM-dd"); + private static final SimpleDateFormat GREGORIAN_DATE_FORMATTER = + new SimpleDateFormat("yyyy-MM-dd"); + + static { + PROLEPTIC_GREGORIAN_CALENDAR.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); + + PROLEPTIC_GREGORIAN_CALENDAR.setLenient(false); + GREGORIAN_CALENDAR.setLenient(false); + + PROLEPTIC_GREGORIAN_DATE_FORMATTER.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR); + GREGORIAN_DATE_FORMATTER.setCalendar(GREGORIAN_CALENDAR); + } + + private boolean usingProlepticCalendar = false; + + public DateColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Change the calendar to or from proleptic. If the new and old values of the flag are the same, + * nothing is done. useProleptic - set the flag for the proleptic calendar updateData - change the + * data to match the new value of the flag. + */ + public void changeCalendar(boolean useProleptic, boolean updateData) { + if (useProleptic == usingProlepticCalendar) { + return; + } + usingProlepticCalendar = useProleptic; + if (updateData) { + updateDataAccordingProlepticSetting(); + } + } + + private void updateDataAccordingProlepticSetting() { + for (int i = 0; i < vector.length; i++) { + Date oldDate = new java.sql.Date(vector[i]); + vector[i] = java.sql.Date + .valueOf(usingProlepticCalendar ? PROLEPTIC_GREGORIAN_DATE_FORMATTER.format(oldDate) + : GREGORIAN_DATE_FORMATTER.format(oldDate)) + .getTime(); + } + } + + /** + * Detect whether this data is using the proleptic calendar. + */ + public boolean usingProlepticCalendar() { + return usingProlepticCalendar; + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public DateColumnVector(int len) { + super(len); + } + + @Override + public void shallowCopyTo(ColumnVector otherCv) { + DateColumnVector other = (DateColumnVector) otherCv; + super.shallowCopyTo(other); + other.vector = vector; + } +} diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index 3b5f3bae90..00cb8405fb 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -18,10 +18,13 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.sql.Timestamp; +import java.text.SimpleDateFormat; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.util.Arrays; +import java.util.GregorianCalendar; +import java.util.TimeZone; import org.apache.hadoop.io.Writable; @@ -38,7 +41,22 @@ * using the scratch timestamp, and then perhaps update the column vector row with a result. */ public class TimestampColumnVector extends ColumnVector { + public static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR_UTC = + new GregorianCalendar(TimeZone.getTimeZone("UTC".intern())); + public static final GregorianCalendar GREGORIAN_CALENDAR_UTC = + new GregorianCalendar(TimeZone.getTimeZone("UTC".intern())); + private static final SimpleDateFormat PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + private static final SimpleDateFormat GREGORIAN_TIMESTAMP_FORMATTER_UTC = + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + static { + PROLEPTIC_GREGORIAN_CALENDAR_UTC.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); + + PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR_UTC); + GREGORIAN_TIMESTAMP_FORMATTER_UTC.setCalendar(GREGORIAN_CALENDAR_UTC); + } /* * The storage arrays for this column vector corresponds to the storage of a Timestamp: */ @@ -58,6 +76,8 @@ private boolean isUTC; + private boolean usingProlepticCalendar = false; + /** * Use this constructor by default. All column vectors * should normally be the default size. @@ -546,4 +566,44 @@ public void shallowCopyTo(ColumnVector otherCv) { other.time = time; other.nanos = nanos; } -} + + /** + * Change the calendar to or from proleptic. If the new and old values of the flag are the + * same, nothing is done. + * useProleptic - set the flag for the proleptic calendar + * updateData - change the data to match the new value of the flag. + */ + public void changeCalendar(boolean useProleptic, boolean updateData) { + if (useProleptic == usingProlepticCalendar) { + return; + } + usingProlepticCalendar = useProleptic; + if (updateData) { + updateDataAccordingProlepticSetting(); + } + } + + private void updateDataAccordingProlepticSetting() { + for (int i = 0; i < nanos.length; i++) { + asScratchTimestamp(i); + //Timestamp.valueOf is assuming a local datetime, so after the conversion, an offset correction is needed + long offset = isUTC ? TimeZone.getDefault().getOffset(scratchTimestamp.getTime()) : 0; + + Timestamp updated = Timestamp.valueOf( + usingProlepticCalendar ? PROLEPTIC_GREGORIAN_TIMESTAMP_FORMATTER_UTC.format(scratchTimestamp) + : GREGORIAN_TIMESTAMP_FORMATTER_UTC.format(scratchTimestamp)); + + scratchTimestamp.setTime(updated.getTime() + offset); + scratchTimestamp.setNanos(nanos[i]); + + setFromScratchTimestamp(i); + } + } + + /** + * Detect whether this data is using the proleptic calendar. + */ + public boolean usingProlepticCalendar() { + return usingProlepticCalendar; + } +} \ No newline at end of file diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java new file mode 100644 index 0000000000..b44eb3c1fd --- /dev/null +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDateColumnVector.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.time.Instant; + +import org.junit.Assert; +import org.junit.Test; + +public class TestDateColumnVector { + + /** + * Test case for DateColumnVector's changeCalendar + * @throws Exception + */ + @Test + public void testProlepticCalendar() throws Exception { + // proleptic + // gregorian day as proleptic gregorian date + setDateAndVerifyProlepticUpdate("2015-11-29", "2015-11-29", true); + + // first gregorian day as proleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-15", "1582-10-15", true); + + // a day before first gregorian day as proleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-14", "1582-10-24", true); + + // a day after last julian day as proleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-05", "1582-10-15", true); + + // last julian day as proleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-04", "1582-10-14", true); + + // older julian day as propleptic gregorian date + setDateAndVerifyProlepticUpdate("0601-03-04", "0601-03-07", true); + + // non-proleptic + // gregorian day as non-proleptic gregorian date + setDateAndVerifyProlepticUpdate("2015-11-29", "2015-11-29", false); + + // first gregorian day as non-proleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-15", "1582-10-15", false); + + // a day before first gregorian day as non-proleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-14", "1582-10-04", false); + + // a day after last julian day as non-proleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-05", "1582-09-25", false); + + // last julian day as non-propleptic gregorian date + setDateAndVerifyProlepticUpdate("1582-10-04", "1582-09-24", false); + + // older julian day as non-propleptic gregorian date + setDateAndVerifyProlepticUpdate("0601-03-04", "0601-03-01", false); + } + + private void setDateAndVerifyProlepticUpdate(String dateString, + String expectedGregorianDateString, boolean useProleptic) throws Exception { + Instant instant = Instant.parse(dateString + "T00:00:00Z"); + long timestamp = instant.toEpochMilli(); + + DateColumnVector dateColumnVector = new DateColumnVector(); + dateColumnVector.vector[0] = timestamp; + + dateColumnVector.changeCalendar(useProleptic, true); + + Assert.assertEquals(expectedGregorianDateString, + getTestFormatter(useProleptic).format(dateColumnVector.vector[0])); + } + + private DateFormat getTestFormatter(boolean useProleptic) { + DateFormat testFormatter = new SimpleDateFormat("yyyy-MM-dd"); + if (useProleptic) { + testFormatter.setCalendar(DateColumnVector.PROLEPTIC_GREGORIAN_CALENDAR); + } else { + testFormatter.setCalendar(DateColumnVector.GREGORIAN_CALENDAR); + } + testFormatter.setLenient(false); + + return testFormatter; + } +} diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java index 3095114ae3..a2557c94f2 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java @@ -18,26 +18,24 @@ package org.apache.hadoop.hive.ql.exec.vector; -import org.junit.Test; +import static org.junit.Assert.assertTrue; -import java.io.PrintWriter; -import java.math.BigDecimal; -import java.math.RoundingMode; import java.sql.Timestamp; -import java.util.Date; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.time.Instant; import java.util.Random; +import java.util.TimeZone; import org.apache.hadoop.hive.common.type.RandomTypeUtil; - -import static org.junit.Assert.*; +import org.junit.Assert; +import org.junit.Test; /** * Test for ListColumnVector */ public class TestTimestampColumnVector { - private static int TEST_COUNT = 5000; - @Test public void testSaveAndRetrieve() throws Exception { @@ -96,20 +94,94 @@ public void testTimestampCompare() throws Exception { } } - /* + /** + * Test case for TimestampColumnVector's changeCalendar + */ @Test - public void testGenerate() throws Exception { - PrintWriter writer = new PrintWriter("/Users/you/timestamps.txt"); - Random r = new Random(18485); - for (int i = 0; i < 25; i++) { - Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r); - writer.println(randTimestamp.toString()); - } - for (int i = 0; i < 25; i++) { - Timestamp randTimestamp = RandomTypeUtil.getRandTimestamp(r, 1965, 2025); - writer.println(randTimestamp.toString()); + public void testProlepticCalendar() { + // proleptic + // a random gregorian day as propleptic gregorian date + setInstantAndVerifyProlepticUpdate("2015-11-29T12:00:00.123Z", "2015-11-29 12:00:00.123", true, true); + setInstantAndVerifyProlepticUpdate("2015-11-29T12:00:00.123Z", "2015-11-29 12:00:00.123", true, false); + + // first gregorian day as propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-15T11:17:22.123Z", "1582-10-15 11:17:22.123", true, true); + setInstantAndVerifyProlepticUpdate("1582-10-15T11:17:22.123Z", "1582-10-15 11:17:22.123", true, false); + + // a day before first gregorian day as propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-14T11:17:22.123Z", "1582-10-24 11:17:22.123", true, true); + setInstantAndVerifyProlepticUpdate("1582-10-14T11:17:22.123Z", "1582-10-24 11:17:22.123", true, false); + + // a day after last julian day as propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-05T11:17:22.123Z", "1582-10-15 11:17:22.123", true, true); + setInstantAndVerifyProlepticUpdate("1582-10-05T11:17:22.123Z", "1582-10-15 11:17:22.123", true, false); + + // last julian day as propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-04T11:17:22.123Z", "1582-10-14 11:17:22.123", true, true); + setInstantAndVerifyProlepticUpdate("1582-10-04T11:17:22.123Z", "1582-10-14 11:17:22.123", true, false); + + // older julian day as propleptic gregorian date + setInstantAndVerifyProlepticUpdate("0601-03-04T11:17:22.123Z", "0601-03-07 11:17:22.123", true, true); + setInstantAndVerifyProlepticUpdate("0601-03-04T11:17:22.123Z", "0601-03-07 11:17:22.123", true, false); + + // non-proleptic + // a random gregorian day as non-propleptic gregorian date + setInstantAndVerifyProlepticUpdate("2015-11-29T12:00:00.123Z", "2015-11-29 12:00:00.123", false, true); + setInstantAndVerifyProlepticUpdate("2015-11-29T12:00:00.123Z", "2015-11-29 12:00:00.123", false, false); + + // first gregorian day as non-propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-15T11:17:22.123Z", "1582-10-15 11:17:22.123", false, true); + setInstantAndVerifyProlepticUpdate("1582-10-15T11:17:22.123Z", "1582-10-15 11:17:22.123", false, false); + + // a day before first gregorian day as non-propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-14T11:17:22.123Z", "1582-10-04 11:17:22.123", false, true); + setInstantAndVerifyProlepticUpdate("1582-10-14T11:17:22.123Z", "1582-10-04 11:17:22.123", false, false); + + // a day after last julian day as non-propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-05T11:17:22.123Z", "1582-09-25 11:17:22.123", false, true); + setInstantAndVerifyProlepticUpdate("1582-10-05T11:17:22.123Z", "1582-09-25 11:17:22.123", false, false); + + // last julian day as non-propleptic gregorian date + setInstantAndVerifyProlepticUpdate("1582-10-04T11:17:22.123Z", "1582-09-24 11:17:22.123", false, true); + setInstantAndVerifyProlepticUpdate("1582-10-04T11:17:22.123Z", "1582-09-24 11:17:22.123", false, false); + + // older julian day as non-propleptic gregorian date + setInstantAndVerifyProlepticUpdate("0601-03-04T11:17:22.123Z", "0601-03-01 11:17:22.123", false, true); + setInstantAndVerifyProlepticUpdate("0601-03-04T11:17:22.123Z", "0601-03-01 11:17:22.123", false, false); + } + + private void setInstantAndVerifyProlepticUpdate(String momentInUtc, String expected, + boolean useProleptic, boolean isUTC) { + TimeZone timeZone = isUTC ? TimeZone.getTimeZone("UTC") : TimeZone.getDefault(); + DateFormat testFormatter = getTestFormatter(useProleptic, timeZone); + + Instant instant = Instant.parse(momentInUtc); // instant is always a moment in UTC + long offsetFromUTC = timeZone.getOffset(instant.toEpochMilli()); + + int nanos = instant.getNano() + new Random().nextInt(999999) + 0; + TimestampColumnVector timestampColVector = new TimestampColumnVector(); + timestampColVector.setIsUTC(isUTC); + timestampColVector.time[0] = instant.toEpochMilli(); + timestampColVector.nanos[0] = nanos; + + timestampColVector.changeCalendar(useProleptic, true); + + Assert.assertEquals(expected, testFormatter + .format(Timestamp.from(Instant.ofEpochMilli(timestampColVector.time[0] - offsetFromUTC)))); + Assert.assertEquals(nanos, timestampColVector.nanos[0]); // preserving nanos + } + + private DateFormat getTestFormatter(boolean useProleptic, TimeZone timeZone) { + DateFormat testFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + + if (useProleptic) { + testFormatter.setCalendar(TimestampColumnVector.PROLEPTIC_GREGORIAN_CALENDAR_UTC); + } else { + testFormatter.setCalendar(TimestampColumnVector.GREGORIAN_CALENDAR_UTC); } - writer.close(); + testFormatter.setTimeZone(timeZone); + testFormatter.setLenient(false); + + return testFormatter; } - */ }