diff --git ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index 6c57da2..2e369ec 100644 --- ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -35,7 +35,40 @@ private static String [][] templateExpansions = { - // The following datetime/interval arithmetic operations can be done using the vectorized values + + /** + * date is stored in a LongColumnVector as epochDays + * interval_year_month is stored in a LongColumnVector as epochMonths + * + * interval_day_time and timestamp are stored in a TimestampColumnVector (2 longs to hold + * very large number of nanoseconds) + * + * date – date --> type: interval_day_time + * timestamp – date --> type: interval_day_time + * date – timestamp --> type: interval_day_time + * timestamp – timestamp --> type: interval_day_time + * + * date +|- interval_day_time --> type: timestamp + * interval_day_time + date --> type: timestamp + * + * timestamp +|- interval_day_time --> type: timestamp + * interval_day_time +|- timestamp --> type: timestamp + * + * date +|- interval_year_month --> type: date + * interval_year_month + date --> type: date + * + * timestamp +|- interval_year_month --> type: timestamp + * interval_year_month + timestamp --> type: timestamp + * + * Adding/Subtracting months done with Calendar object + * + * Timestamp Compare with Long with long interpreted as seconds + * Timestamp Compare with Double with double interpreted as seconds with fractional nanoseconds + * + */ + + // The following datetime/interval arithmetic operations can be done using the vectorized values. + // Type interval_year_month (LongColumnVector storing months). {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "interval_year_month", "interval_year_month", "+"}, {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "interval_year_month", "interval_year_month", "+"}, {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "interval_year_month", "interval_year_month", "+"}, @@ -44,80 +77,114 @@ {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "interval_year_month", "interval_year_month", "-"}, {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "interval_year_month", "interval_year_month", "-"}, - {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "interval_day_time", "interval_day_time", "+"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "interval_day_time", "+"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "interval_day_time", "+"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Subtract", "interval_day_time", "interval_day_time", "-"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "interval_day_time", "interval_day_time", "-"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "interval_day_time", "interval_day_time", "-"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "interval_day_time", "timestamp", "+"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "timestamp", "+"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "timestamp", "+"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "timestamp", "interval_day_time", "+"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "timestamp", "interval_day_time", "+"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "timestamp", "interval_day_time", "+"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Subtract", "timestamp", "interval_day_time", "-"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "interval_day_time", "-"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "interval_day_time", "-"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Subtract", "timestamp", "timestamp", "-"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "timestamp", "-"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "timestamp", "-"}, - - // The following datetime/interval arithmetic functions require type conversion for one or both operands - {"ColumnArithmeticColumnWithConvert", "Subtract", "date", "date", "-", "TimestampUtils.daysToNanoseconds", "TimestampUtils.daysToNanoseconds"}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "date", "date", "-", "TimestampUtils.daysToNanoseconds", "TimestampUtils.daysToNanoseconds"}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "date", "date", "-", "TimestampUtils.daysToNanoseconds", "TimestampUtils.daysToNanoseconds"}, - - {"ColumnArithmeticColumnWithConvert", "Subtract", "date", "timestamp", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "date", "timestamp", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "date", "timestamp", "-", "TimestampUtils.daysToNanoseconds", ""}, - - {"ColumnArithmeticColumnWithConvert", "Subtract", "timestamp", "date", "-", "", "TimestampUtils.daysToNanoseconds"}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "timestamp", "date", "-", "", "TimestampUtils.daysToNanoseconds"}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "timestamp", "date", "-", "", "TimestampUtils.daysToNanoseconds"}, - - {"ColumnArithmeticColumnWithConvert", "Add", "date", "interval_day_time", "+", "TimestampUtils.daysToNanoseconds", ""}, - {"ScalarArithmeticColumnWithConvert", "Add", "date", "interval_day_time", "+", "TimestampUtils.daysToNanoseconds", ""}, - {"ColumnArithmeticScalarWithConvert", "Add", "date", "interval_day_time", "+", "TimestampUtils.daysToNanoseconds", ""}, - - {"ColumnArithmeticColumnWithConvert", "Subtract", "date", "interval_day_time", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "date", "interval_day_time", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "date", "interval_day_time", "-", "TimestampUtils.daysToNanoseconds", ""}, - - {"ColumnArithmeticColumnWithConvert", "Add", "interval_day_time", "date", "+", "", "TimestampUtils.daysToNanoseconds"}, - {"ScalarArithmeticColumnWithConvert", "Add", "interval_day_time", "date", "+", "", "TimestampUtils.daysToNanoseconds"}, - {"ColumnArithmeticScalarWithConvert", "Add", "interval_day_time", "date", "+", "", "TimestampUtils.daysToNanoseconds"}, - - // Most year-month interval arithmetic needs its own generation - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Add", "date", "interval_year_month", "+", "", "dtm.addMonthsToDays"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Add", "date", "interval_year_month", "+", "", "dtm.addMonthsToDays"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Add", "date", "interval_year_month", "+", "", "dtm.addMonthsToDays"}, - - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Subtract", "date", "interval_year_month", "-", "", "dtm.addMonthsToDays"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Subtract", "date", "interval_year_month", "-", "", "dtm.addMonthsToDays"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Subtract", "date", "interval_year_month", "-", "", "dtm.addMonthsToDays"}, - - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Add", "timestamp", "interval_year_month", "+", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Add", "timestamp", "interval_year_month", "+", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Add", "timestamp", "interval_year_month", "+", "", "dtm.addMonthsToNanosUtc"}, - - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Subtract", "timestamp", "interval_year_month", "-", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Subtract", "timestamp", "interval_year_month", "-", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Subtract", "timestamp", "interval_year_month", "-", "", "dtm.addMonthsToNanosUtc"}, - - {"IntervalColumnArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "date", "+", "", "dtm.addMonthsToDays"}, - {"IntervalScalarArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "date", "+", "", "dtm.addMonthsToDays"}, - {"IntervalColumnArithmeticDateTimeScalarWithConvert", "Add", "interval_year_month", "date", "+", "", "dtm.addMonthsToDays"}, - - {"IntervalColumnArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "timestamp", "+", "", "dtm.addMonthsToNanosUtc"}, - {"IntervalScalarArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "timestamp", "+", "", "dtm.addMonthsToNanosUtc"}, - {"IntervalColumnArithmeticDateTimeScalarWithConvert", "Add", "interval_year_month", "timestamp", "+", "", "dtm.addMonthsToNanosUtc"}, - + // Arithmetic on two TimestampColumnVector base classes. + {"TimestampArithmeticTimestampBase", "Add", "Col", "Column"}, + {"TimestampArithmeticTimestampBase", "Add", "Scalar", "Column"}, + {"TimestampArithmeticTimestampBase", "Add", "Col", "Scalar"}, + + {"TimestampArithmeticTimestampBase", "Subtract", "Col", "Column"}, + {"TimestampArithmeticTimestampBase", "Subtract", "Scalar", "Column"}, + {"TimestampArithmeticTimestampBase", "Subtract", "Col", "Scalar"}, + + // Arithmetic on two type interval_day_time (TimestampColumnVector storing nanosecond interval + // in 2 longs) produces a interval_day_time. + {"TimestampArithmeticTimestamp", "Add", "interval_day_time", "Col", "interval_day_time", "Scalar"}, + {"TimestampArithmeticTimestamp", "Add", "interval_day_time", "Scalar", "interval_day_time", "Column"}, + {"TimestampArithmeticTimestamp", "Add", "interval_day_time", "Col", "interval_day_time", "Column"}, + + {"TimestampArithmeticTimestamp", "Subtract", "interval_day_time", "Col", "interval_day_time", "Scalar"}, + {"TimestampArithmeticTimestamp", "Subtract", "interval_day_time", "Scalar", "interval_day_time", "Column"}, + {"TimestampArithmeticTimestamp", "Subtract", "interval_day_time", "Col", "interval_day_time", "Column"}, + + // A type timestamp (TimestampColumnVector) plus/minus a type interval_day_time (TimestampColumnVector + // storing nanosecond interval in 2 longs) produces a timestamp. + {"TimestampArithmeticTimestamp", "Add", "interval_day_time", "Col", "timestamp", "Scalar"}, + {"TimestampArithmeticTimestamp", "Add", "interval_day_time", "Scalar", "timestamp", "Column"}, + {"TimestampArithmeticTimestamp", "Add", "interval_day_time", "Col", "timestamp", "Column"}, + + {"TimestampArithmeticTimestamp", "Add", "timestamp", "Col", "interval_day_time", "Scalar"}, + {"TimestampArithmeticTimestamp", "Add", "timestamp", "Scalar", "interval_day_time", "Column"}, + {"TimestampArithmeticTimestamp", "Add", "timestamp", "Col", "interval_day_time", "Column"}, + + {"TimestampArithmeticTimestamp", "Subtract", "timestamp", "Col", "interval_day_time", "Scalar"}, + {"TimestampArithmeticTimestamp", "Subtract", "timestamp", "Scalar", "interval_day_time", "Column"}, + {"TimestampArithmeticTimestamp", "Subtract", "timestamp", "Col", "interval_day_time", "Column"}, + + // A type timestamp (TimestampColumnVector) minus a type timestamp produces a + // type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). + {"TimestampArithmeticTimestamp", "Subtract", "timestamp", "Col", "timestamp", "Scalar"}, + {"TimestampArithmeticTimestamp", "Subtract", "timestamp", "Scalar", "timestamp", "Column"}, + {"TimestampArithmeticTimestamp", "Subtract", "timestamp", "Col", "timestamp", "Column"}, + + // Arithmetic on a TimestampColumnVector and date base classes. + {"DateArithmeticTimestampBase", "Add", "Col", "Column"}, + {"DateArithmeticTimestampBase", "Add", "Scalar", "Column"}, + {"DateArithmeticTimestampBase", "Add", "Col", "Scalar"}, + + {"DateArithmeticTimestampBase", "Subtract", "Col", "Column"}, + {"DateArithmeticTimestampBase", "Subtract", "Scalar", "Column"}, + {"DateArithmeticTimestampBase", "Subtract", "Col", "Scalar"}, + + {"TimestampArithmeticDateBase", "Add", "Col", "Column"}, + {"TimestampArithmeticDateBase", "Add", "Scalar", "Column"}, + {"TimestampArithmeticDateBase", "Add", "Col", "Scalar"}, + + {"TimestampArithmeticDateBase", "Subtract", "Col", "Column"}, + {"TimestampArithmeticDateBase", "Subtract", "Scalar", "Column"}, + {"TimestampArithmeticDateBase", "Subtract", "Col", "Scalar"}, + + // Arithmetic with a type date (LongColumnVector storing epoch days) and type interval_day_time (TimestampColumnVector storing + // nanosecond interval in 2 longs) produces a type timestamp (TimestampColumnVector). + {"DateArithmeticTimestamp", "Add", "date", "Col", "interval_day_time", "Column"}, + {"DateArithmeticTimestamp", "Add", "date", "Scalar", "interval_day_time", "Column"}, + {"DateArithmeticTimestamp", "Add", "date", "Col", "interval_day_time", "Scalar"}, + + {"DateArithmeticTimestamp", "Subtract", "date", "Col", "interval_day_time", "Column"}, + {"DateArithmeticTimestamp", "Subtract", "date", "Scalar", "interval_day_time", "Column"}, + {"DateArithmeticTimestamp", "Subtract", "date", "Col", "interval_day_time", "Scalar"}, + + {"TimestampArithmeticDate", "Add", "interval_day_time", "Col", "date", "Column"}, + {"TimestampArithmeticDate", "Add", "interval_day_time", "Scalar", "date", "Column"}, + {"TimestampArithmeticDate", "Add", "interval_day_time", "Col", "date", "Scalar"}, + + // Subtraction with a type date (LongColumnVector storing epoch days) and type timestamp produces a type timestamp (TimestampColumnVector). + {"DateArithmeticTimestamp", "Subtract", "date", "Col", "timestamp", "Column"}, + {"DateArithmeticTimestamp", "Subtract", "date", "Scalar", "timestamp", "Column"}, + {"DateArithmeticTimestamp", "Subtract", "date", "Col", "timestamp", "Scalar"}, + + {"TimestampArithmeticDate", "Subtract", "timestamp", "Col", "date", "Column"}, + {"TimestampArithmeticDate", "Subtract", "timestamp", "Scalar", "date", "Column"}, + {"TimestampArithmeticDate", "Subtract", "timestamp", "Col", "date", "Scalar"}, + + // Arithmetic with a type date (LongColumnVector storing epoch days) and type interval_year_month (LongColumnVector storing + // months) produces a type date via a calendar calculation. + {"DateArithmeticIntervalYearMonth", "Add", "+", "date", "Col", "interval_year_month", "Column"}, + {"DateArithmeticIntervalYearMonth", "Add", "+", "date", "Scalar", "interval_year_month", "Column"}, + {"DateArithmeticIntervalYearMonth", "Add", "+", "date", "Col", "interval_year_month", "Scalar"}, + + {"DateArithmeticIntervalYearMonth", "Subtract", "-", "date", "Col", "interval_year_month", "Column"}, + {"DateArithmeticIntervalYearMonth", "Subtract", "-", "date", "Scalar", "interval_year_month", "Column"}, + {"DateArithmeticIntervalYearMonth", "Subtract", "-", "date", "Col", "interval_year_month", "Scalar"}, + + {"IntervalYearMonthArithmeticDate", "Add", "+", "interval_year_month", "Col", "date", "Column"}, + {"IntervalYearMonthArithmeticDate", "Add", "+", "interval_year_month", "Scalar", "date", "Column"}, + {"IntervalYearMonthArithmeticDate", "Add", "+", "interval_year_month", "Col", "date", "Scalar"}, + + // Arithmetic with a type timestamp (TimestampColumnVector) and type interval_year_month (LongColumnVector storing + // months) produces a type timestamp via a calendar calculation. + {"TimestampArithmeticIntervalYearMonth", "Add", "+", "timestamp", "Col", "interval_year_month", "Column"}, + {"TimestampArithmeticIntervalYearMonth", "Add", "+", "timestamp", "Scalar", "interval_year_month", "Column"}, + {"TimestampArithmeticIntervalYearMonth", "Add", "+", "timestamp", "Col", "interval_year_month", "Scalar"}, + + {"TimestampArithmeticIntervalYearMonth", "Subtract", "-", "timestamp", "Col", "interval_year_month", "Column"}, + {"TimestampArithmeticIntervalYearMonth", "Subtract", "-", "timestamp", "Scalar", "interval_year_month", "Column"}, + {"TimestampArithmeticIntervalYearMonth", "Subtract", "-", "timestamp", "Col", "interval_year_month", "Scalar"}, + + {"IntervalYearMonthArithmeticTimestamp", "Add","+", "interval_year_month", "Col", "timestamp", "Column"}, + {"IntervalYearMonthArithmeticTimestamp", "Add","+", "interval_year_month", "Scalar", "timestamp", "Column"}, + {"IntervalYearMonthArithmeticTimestamp", "Add","+", "interval_year_month", "Col", "timestamp", "Scalar"}, + + // Long/double arithmetic {"ColumnArithmeticScalar", "Add", "long", "long", "+"}, {"ColumnArithmeticScalar", "Subtract", "long", "long", "-"}, {"ColumnArithmeticScalar", "Multiply", "long", "long", "*"}, @@ -251,46 +318,151 @@ {"ScalarCompareColumn", "Greater", "double", "long", ">"}, {"ScalarCompareColumn", "GreaterEqual", "double", "long", ">="}, - {"TimestampColumnCompareTimestampScalar", "Equal"}, - {"TimestampColumnCompareTimestampScalar", "NotEqual"}, - {"TimestampColumnCompareTimestampScalar", "Less"}, - {"TimestampColumnCompareTimestampScalar", "LessEqual"}, - {"TimestampColumnCompareTimestampScalar", "Greater"}, - {"TimestampColumnCompareTimestampScalar", "GreaterEqual"}, - - {"TimestampColumnCompareScalar", "Equal", "long"}, - {"TimestampColumnCompareScalar", "Equal", "double"}, - {"TimestampColumnCompareScalar", "NotEqual", "long"}, - {"TimestampColumnCompareScalar", "NotEqual", "double"}, - {"TimestampColumnCompareScalar", "Less", "long"}, - {"TimestampColumnCompareScalar", "Less", "double"}, - {"TimestampColumnCompareScalar", "LessEqual", "long"}, - {"TimestampColumnCompareScalar", "LessEqual", "double"}, - {"TimestampColumnCompareScalar", "Greater", "long"}, - {"TimestampColumnCompareScalar", "Greater", "double"}, - {"TimestampColumnCompareScalar", "GreaterEqual", "long"}, - {"TimestampColumnCompareScalar", "GreaterEqual", "double"}, - - {"TimestampScalarCompareTimestampColumn", "Equal"}, - {"TimestampScalarCompareTimestampColumn", "NotEqual"}, - {"TimestampScalarCompareTimestampColumn", "Less"}, - {"TimestampScalarCompareTimestampColumn", "LessEqual"}, - {"TimestampScalarCompareTimestampColumn", "Greater"}, - {"TimestampScalarCompareTimestampColumn", "GreaterEqual"}, - - {"ScalarCompareTimestampColumn", "Equal", "long"}, - {"ScalarCompareTimestampColumn", "Equal", "double"}, - {"ScalarCompareTimestampColumn", "NotEqual", "long"}, - {"ScalarCompareTimestampColumn", "NotEqual", "double"}, - {"ScalarCompareTimestampColumn", "Less", "long"}, - {"ScalarCompareTimestampColumn", "Less", "double"}, - {"ScalarCompareTimestampColumn", "LessEqual", "long"}, - {"ScalarCompareTimestampColumn", "LessEqual", "double"}, - {"ScalarCompareTimestampColumn", "Greater", "long"}, - {"ScalarCompareTimestampColumn", "Greater", "double"}, - {"ScalarCompareTimestampColumn", "GreaterEqual", "long"}, - {"ScalarCompareTimestampColumn", "GreaterEqual", "double"}, - + // Base compare timestamp to timestamp used by Timestamp and IntervalDayTime. + {"TimestampCompareTimestampBase", "Equal", "==", "Col", "Column"}, + {"TimestampCompareTimestampBase", "NotEqual", "!=", "Col", "Column"}, + {"TimestampCompareTimestampBase", "Less", "<", "Col", "Column"}, + {"TimestampCompareTimestampBase", "LessEqual", "<=", "Col", "Column"}, + {"TimestampCompareTimestampBase", "Greater", ">", "Col", "Column"}, + {"TimestampCompareTimestampBase", "GreaterEqual", ">=", "Col", "Column"}, + + {"TimestampCompareTimestampBase", "Equal", "==", "Col", "Scalar"}, + {"TimestampCompareTimestampBase", "NotEqual", "!=", "Col", "Scalar"}, + {"TimestampCompareTimestampBase", "Less", "<", "Col", "Scalar"}, + {"TimestampCompareTimestampBase", "LessEqual", "<=", "Col", "Scalar"}, + {"TimestampCompareTimestampBase", "Greater", ">", "Col", "Scalar"}, + {"TimestampCompareTimestampBase", "GreaterEqual", ">=", "Col", "Scalar"}, + + {"TimestampCompareTimestampBase", "Equal", "==", "Scalar", "Column"}, + {"TimestampCompareTimestampBase", "NotEqual", "!=", "Scalar", "Column"}, + {"TimestampCompareTimestampBase", "Less", "<", "Scalar", "Column"}, + {"TimestampCompareTimestampBase", "LessEqual", "<=", "Scalar", "Column"}, + {"TimestampCompareTimestampBase", "Greater", ">", "Scalar", "Column"}, + {"TimestampCompareTimestampBase", "GreaterEqual", ">=", "Scalar", "Column"}, + + // Compare timestamp to timestamp. + {"TimestampCompareTimestamp", "Equal", "timestamp", "Col", "Column"}, + {"TimestampCompareTimestamp", "NotEqual", "timestamp", "Col", "Column"}, + {"TimestampCompareTimestamp", "Less", "timestamp", "Col", "Column"}, + {"TimestampCompareTimestamp", "LessEqual", "timestamp", "Col", "Column"}, + {"TimestampCompareTimestamp", "Greater", "timestamp", "Col", "Column"}, + {"TimestampCompareTimestamp", "GreaterEqual", "timestamp", "Col", "Column"}, + + {"TimestampCompareTimestamp", "Equal", "timestamp", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "NotEqual", "timestamp", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "Less", "timestamp", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "LessEqual", "timestamp", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "Greater", "timestamp", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "GreaterEqual", "timestamp", "Col", "Scalar"}, + + {"TimestampCompareTimestamp", "Equal", "timestamp", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "NotEqual", "timestamp", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "Less", "timestamp", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "LessEqual", "timestamp", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "Greater", "timestamp", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "GreaterEqual", "timestamp", "Scalar", "Column"}, + + {"TimestampCompareTimestamp", "Equal", "interval_day_time", "Col", "Column"}, + {"TimestampCompareTimestamp", "NotEqual", "interval_day_time", "Col", "Column"}, + {"TimestampCompareTimestamp", "Less", "interval_day_time", "Col", "Column"}, + {"TimestampCompareTimestamp", "LessEqual", "interval_day_time", "Col", "Column"}, + {"TimestampCompareTimestamp", "Greater", "interval_day_time", "Col", "Column"}, + {"TimestampCompareTimestamp", "GreaterEqual", "interval_day_time", "Col", "Column"}, + + {"TimestampCompareTimestamp", "Equal", "interval_day_time", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "NotEqual", "interval_day_time", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "Less", "interval_day_time", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "LessEqual", "interval_day_time", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "Greater", "interval_day_time", "Col", "Scalar"}, + {"TimestampCompareTimestamp", "GreaterEqual", "interval_day_time", "Col", "Scalar"}, + + {"TimestampCompareTimestamp", "Equal", "interval_day_time", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "NotEqual", "interval_day_time", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "Less", "interval_day_time", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "LessEqual", "interval_day_time", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "Greater", "interval_day_time", "Scalar", "Column"}, + {"TimestampCompareTimestamp", "GreaterEqual", "interval_day_time", "Scalar", "Column"}, + + // Compare timestamp to integer seconds or double seconds with fractional nanoseonds. + {"TimestampCompareLongDouble", "Equal", "long", "==", "Col", "Column"}, + {"TimestampCompareLongDouble", "Equal", "double", "==", "Col", "Column"}, + {"TimestampCompareLongDouble", "NotEqual", "long", "!=", "Col", "Column"}, + {"TimestampCompareLongDouble", "NotEqual", "double", "!=", "Col", "Column"}, + {"TimestampCompareLongDouble", "Less", "long", "<", "Col", "Column"}, + {"TimestampCompareLongDouble", "Less", "double", "<", "Col", "Column"}, + {"TimestampCompareLongDouble", "LessEqual", "long", "<=", "Col", "Column"}, + {"TimestampCompareLongDouble", "LessEqual", "double", "<=", "Col", "Column"}, + {"TimestampCompareLongDouble", "Greater", "long", ">", "Col", "Column"}, + {"TimestampCompareLongDouble", "Greater", "double", ">", "Col", "Column"}, + {"TimestampCompareLongDouble", "GreaterEqual", "long", ">=", "Col", "Column"}, + {"TimestampCompareLongDouble", "GreaterEqual", "double", ">=", "Col", "Column"}, + + {"LongDoubleCompareTimestamp", "Equal", "long", "==", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "Equal", "double", "==", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "NotEqual", "long", "!=", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "NotEqual", "double", "!=", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "Less", "long", "<", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "Less", "double", "<", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "LessEqual", "long", "<=", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "LessEqual", "double", "<=", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "Greater", "long", ">", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "Greater", "double", ">", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "GreaterEqual", "long", ">=", "Col", "Column"}, + {"LongDoubleCompareTimestamp", "GreaterEqual", "double", ">=", "Col", "Column"}, + + {"TimestampCompareLongDouble", "Equal", "long", "==", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "Equal", "double", "==", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "NotEqual", "long", "!=", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "NotEqual", "double", "!=", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "Less", "long", "<", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "Less", "double", "<", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "LessEqual", "long", "<=", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "LessEqual", "double", "<=", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "Greater", "long", ">", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "Greater", "double", ">", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "GreaterEqual", "long", ">=", "Col", "Scalar"}, + {"TimestampCompareLongDouble", "GreaterEqual", "double", ">=", "Col", "Scalar"}, + + {"LongDoubleCompareTimestamp", "Equal", "long", "==", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "Equal", "double", "==", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "NotEqual", "long", "!=", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "NotEqual", "double", "!=", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "Less", "long", "<", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "Less", "double", "<", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "LessEqual", "long", "<=", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "LessEqual", "double", "<=", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "Greater", "long", ">", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "Greater", "double", ">", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "GreaterEqual", "long", ">=", "Col", "Scalar"}, + {"LongDoubleCompareTimestamp", "GreaterEqual", "double", ">=", "Col", "Scalar"}, + + {"TimestampCompareLongDouble", "Equal", "long", "==", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "Equal", "double", "==", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "NotEqual", "long", "!=", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "NotEqual", "double", "!=", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "Less", "long", "<", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "Less", "double", "<", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "LessEqual", "long", "<=", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "LessEqual", "double", "<=", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "Greater", "long", ">", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "Greater", "double", ">", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "GreaterEqual", "long", ">=", "Scalar", "Column"}, + {"TimestampCompareLongDouble", "GreaterEqual", "double", ">=", "Scalar", "Column"}, + + {"LongDoubleCompareTimestamp", "Equal", "long", "==", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "Equal", "double", "==", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "NotEqual", "long", "!=", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "NotEqual", "double", "!=", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "Less", "long", "<", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "Less", "double", "<", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "LessEqual", "long", "<=", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "LessEqual", "double", "<=", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "Greater", "long", ">", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "Greater", "double", ">", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "GreaterEqual", "long", ">=", "Scalar", "Column"}, + {"LongDoubleCompareTimestamp", "GreaterEqual", "double", ">=", "Scalar", "Column"}, + + // Filter long/double. {"FilterColumnCompareScalar", "Equal", "long", "double", "=="}, {"FilterColumnCompareScalar", "Equal", "double", "double", "=="}, {"FilterColumnCompareScalar", "NotEqual", "long", "double", "!="}, @@ -343,46 +515,154 @@ {"FilterScalarCompareColumn", "GreaterEqual", "long", "long", ">="}, {"FilterScalarCompareColumn", "GreaterEqual", "double", "long", ">="}, - {"FilterTimestampColumnCompareTimestampScalar", "Equal"}, - {"FilterTimestampColumnCompareTimestampScalar", "NotEqual"}, - {"FilterTimestampColumnCompareTimestampScalar", "Less"}, - {"FilterTimestampColumnCompareTimestampScalar", "LessEqual"}, - {"FilterTimestampColumnCompareTimestampScalar", "Greater"}, - {"FilterTimestampColumnCompareTimestampScalar", "GreaterEqual"}, - - {"FilterTimestampColumnCompareScalar", "Equal", "long"}, - {"FilterTimestampColumnCompareScalar", "Equal", "double"}, - {"FilterTimestampColumnCompareScalar", "NotEqual", "long"}, - {"FilterTimestampColumnCompareScalar", "NotEqual", "double"}, - {"FilterTimestampColumnCompareScalar", "Less", "long"}, - {"FilterTimestampColumnCompareScalar", "Less", "double"}, - {"FilterTimestampColumnCompareScalar", "LessEqual", "long"}, - {"FilterTimestampColumnCompareScalar", "LessEqual", "double"}, - {"FilterTimestampColumnCompareScalar", "Greater", "long"}, - {"FilterTimestampColumnCompareScalar", "Greater", "double"}, - {"FilterTimestampColumnCompareScalar", "GreaterEqual", "long"}, - {"FilterTimestampColumnCompareScalar", "GreaterEqual", "double"}, - - {"FilterTimestampScalarCompareTimestampColumn", "Equal"}, - {"FilterTimestampScalarCompareTimestampColumn", "NotEqual"}, - {"FilterTimestampScalarCompareTimestampColumn", "Less"}, - {"FilterTimestampScalarCompareTimestampColumn", "LessEqual"}, - {"FilterTimestampScalarCompareTimestampColumn", "Greater"}, - {"FilterTimestampScalarCompareTimestampColumn", "GreaterEqual"}, - - {"FilterScalarCompareTimestampColumn", "Equal", "long"}, - {"FilterScalarCompareTimestampColumn", "Equal", "double"}, - {"FilterScalarCompareTimestampColumn", "NotEqual", "long"}, - {"FilterScalarCompareTimestampColumn", "NotEqual", "double"}, - {"FilterScalarCompareTimestampColumn", "Less", "long"}, - {"FilterScalarCompareTimestampColumn", "Less", "double"}, - {"FilterScalarCompareTimestampColumn", "LessEqual", "long"}, - {"FilterScalarCompareTimestampColumn", "LessEqual", "double"}, - {"FilterScalarCompareTimestampColumn", "Greater", "long"}, - {"FilterScalarCompareTimestampColumn", "Greater", "double"}, - {"FilterScalarCompareTimestampColumn", "GreaterEqual", "long"}, - {"FilterScalarCompareTimestampColumn", "GreaterEqual", "double"}, - + // Base filter timestamp against timestamp used by Timestamp and IntervalDayTime. + {"FilterTimestampCompareTimestampBase", "Equal", "==", "Col", "Column"}, + {"FilterTimestampCompareTimestampBase", "NotEqual", "!=", "Col", "Column"}, + {"FilterTimestampCompareTimestampBase", "Less", "<", "Col", "Column"}, + {"FilterTimestampCompareTimestampBase", "LessEqual", "<=", "Col", "Column"}, + {"FilterTimestampCompareTimestampBase", "Greater", ">", "Col", "Column"}, + {"FilterTimestampCompareTimestampBase", "GreaterEqual", ">=", "Col", "Column"}, + + {"FilterTimestampCompareTimestampBase", "Equal", "==", "Col", "Scalar"}, + {"FilterTimestampCompareTimestampBase", "NotEqual", "!=", "Col", "Scalar"}, + {"FilterTimestampCompareTimestampBase", "Less", "<", "Col", "Scalar"}, + {"FilterTimestampCompareTimestampBase", "LessEqual", "<=", "Col", "Scalar"}, + {"FilterTimestampCompareTimestampBase", "Greater", ">", "Col", "Scalar"}, + {"FilterTimestampCompareTimestampBase", "GreaterEqual", ">=", "Col", "Scalar"}, + + {"FilterTimestampCompareTimestampBase", "Equal", "==", "Scalar", "Column"}, + {"FilterTimestampCompareTimestampBase", "NotEqual", "!=", "Scalar", "Column"}, + {"FilterTimestampCompareTimestampBase", "Less", "<", "Scalar", "Column"}, + {"FilterTimestampCompareTimestampBase", "LessEqual", "<=", "Scalar", "Column"}, + {"FilterTimestampCompareTimestampBase", "Greater", ">", "Scalar", "Column"}, + {"FilterTimestampCompareTimestampBase", "GreaterEqual", ">=", "Scalar", "Column"}, + + // Filter timestamp against timestamp, or interval day time against interval day time. + + {"FilterTimestampCompareTimestamp", "Equal", "timestamp", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "NotEqual", "timestamp", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "Less", "timestamp", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "LessEqual", "timestamp", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "Greater", "timestamp", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "GreaterEqual", "timestamp", "Col", "Column"}, + + {"FilterTimestampCompareTimestamp", "Equal", "timestamp", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "NotEqual", "timestamp", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "Less", "timestamp", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "LessEqual", "timestamp", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "Greater", "timestamp", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "GreaterEqual", "timestamp", "Col", "Scalar"}, + + {"FilterTimestampCompareTimestamp", "Equal", "timestamp", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "NotEqual", "timestamp", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "Less", "timestamp", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "LessEqual", "timestamp", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "Greater", "timestamp", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "GreaterEqual", "timestamp", "Scalar", "Column"}, + + {"FilterTimestampCompareTimestamp", "Equal", "interval_day_time", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "NotEqual", "interval_day_time", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "Less", "interval_day_time", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "LessEqual", "interval_day_time", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "Greater", "interval_day_time", "Col", "Column"}, + {"FilterTimestampCompareTimestamp", "GreaterEqual", "interval_day_time", "Col", "Column"}, + + {"FilterTimestampCompareTimestamp", "Equal", "interval_day_time", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "NotEqual", "interval_day_time", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "Less", "interval_day_time", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "LessEqual", "interval_day_time", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "Greater", "interval_day_time", "Col", "Scalar"}, + {"FilterTimestampCompareTimestamp", "GreaterEqual", "interval_day_time", "Col", "Scalar"}, + + {"FilterTimestampCompareTimestamp", "Equal", "interval_day_time", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "NotEqual", "interval_day_time", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "Less", "interval_day_time", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "LessEqual", "interval_day_time", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "Greater", "interval_day_time", "Scalar", "Column"}, + {"FilterTimestampCompareTimestamp", "GreaterEqual", "interval_day_time", "Scalar", "Column"}, + + // Filter timestamp against long (seconds) or double (seconds with fractional + // nanoseconds). + + {"FilterTimestampCompareLongDouble", "Equal", "long", "==", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "Equal", "double", "==", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "NotEqual", "long", "!=", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "NotEqual", "double", "!=", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "Less", "long", "<", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "Less", "double", "<", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "LessEqual", "long", "<=", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "LessEqual", "double", "<=", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "Greater", "long", ">", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "Greater", "double", ">", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "GreaterEqual", "long", ">=", "Col", "Column"}, + {"FilterTimestampCompareLongDouble", "GreaterEqual", "double", ">=", "Col", "Column"}, + + {"FilterLongDoubleCompareTimestamp", "Equal", "long", "==", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Equal", "double", "==", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "NotEqual", "long", "!=", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "NotEqual", "double", "!=", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Less", "long", "<", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Less", "double", "<", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "LessEqual", "long", "<=", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "LessEqual", "double", "<=", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Greater", "long", ">", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Greater", "double", ">", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "GreaterEqual", "long", ">=", "Col", "Column"}, + {"FilterLongDoubleCompareTimestamp", "GreaterEqual", "double", ">=", "Col", "Column"}, + + {"FilterTimestampCompareLongDouble", "Equal", "long", "==", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "Equal", "double", "==", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "NotEqual", "long", "!=", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "NotEqual", "double", "!=", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "Less", "long", "<", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "Less", "double", "<", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "LessEqual", "long", "<=", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "LessEqual", "double", "<=", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "Greater", "long", ">", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "Greater", "double", ">", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "GreaterEqual", "long", ">=", "Col", "Scalar"}, + {"FilterTimestampCompareLongDouble", "GreaterEqual", "double", ">=", "Col", "Scalar"}, + + {"FilterLongDoubleCompareTimestamp", "Equal", "long", "==", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "Equal", "double", "==", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "NotEqual", "long", "!=", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "NotEqual", "double", "!=", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "Less", "long", "<", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "Less", "double", "<", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "LessEqual", "long", "<=", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "LessEqual", "double", "<=", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "Greater", "long", ">", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "Greater", "double", ">", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "GreaterEqual", "long", ">=", "Col", "Scalar"}, + {"FilterLongDoubleCompareTimestamp", "GreaterEqual", "double", ">=", "Col", "Scalar"}, + + {"FilterTimestampCompareLongDouble", "Equal", "long", "==", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "Equal", "double", "==", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "NotEqual", "long", "!=", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "NotEqual", "double", "!=", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "Less", "long", "<", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "Less", "double", "<", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "LessEqual", "long", "<=", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "LessEqual", "double", "<=", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "Greater", "long", ">", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "Greater", "double", ">", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "GreaterEqual", "long", ">=", "Scalar", "Column"}, + {"FilterTimestampCompareLongDouble", "GreaterEqual", "double", ">=", "Scalar", "Column"}, + + {"FilterLongDoubleCompareTimestamp", "Equal", "long", "==", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Equal", "double", "==", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "NotEqual", "long", "!=", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "NotEqual", "double", "!=", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Less", "long", "<", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Less", "double", "<", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "LessEqual", "long", "<=", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "LessEqual", "double", "<=", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Greater", "long", ">", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "Greater", "double", ">", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "GreaterEqual", "long", ">=", "Scalar", "Column"}, + {"FilterLongDoubleCompareTimestamp", "GreaterEqual", "double", ">=", "Scalar", "Column"}, + + // String group comparison. {"FilterStringGroupColumnCompareStringGroupScalarBase", "Equal", "=="}, {"FilterStringGroupColumnCompareStringGroupScalarBase", "NotEqual", "!="}, {"FilterStringGroupColumnCompareStringGroupScalarBase", "Less", "<"}, @@ -476,26 +756,28 @@ {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "Greater", ">"}, {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "GreaterEqual", ">="}, - {"FilterDecimalColumnCompareScalar", "Equal", "=="}, - {"FilterDecimalColumnCompareScalar", "NotEqual", "!="}, - {"FilterDecimalColumnCompareScalar", "Less", "<"}, - {"FilterDecimalColumnCompareScalar", "LessEqual", "<="}, - {"FilterDecimalColumnCompareScalar", "Greater", ">"}, - {"FilterDecimalColumnCompareScalar", "GreaterEqual", ">="}, - - {"FilterDecimalScalarCompareColumn", "Equal", "=="}, - {"FilterDecimalScalarCompareColumn", "NotEqual", "!="}, - {"FilterDecimalScalarCompareColumn", "Less", "<"}, - {"FilterDecimalScalarCompareColumn", "LessEqual", "<="}, - {"FilterDecimalScalarCompareColumn", "Greater", ">"}, - {"FilterDecimalScalarCompareColumn", "GreaterEqual", ">="}, - - {"FilterDecimalColumnCompareColumn", "Equal", "=="}, - {"FilterDecimalColumnCompareColumn", "NotEqual", "!="}, - {"FilterDecimalColumnCompareColumn", "Less", "<"}, - {"FilterDecimalColumnCompareColumn", "LessEqual", "<="}, - {"FilterDecimalColumnCompareColumn", "Greater", ">"}, - {"FilterDecimalColumnCompareColumn", "GreaterEqual", ">="}, + + {"FilterDecimalColumnCompareDecimalScalar", "Equal", "=="}, + {"FilterDecimalColumnCompareDecimalScalar", "NotEqual", "!="}, + {"FilterDecimalColumnCompareDecimalScalar", "Less", "<"}, + {"FilterDecimalColumnCompareDecimalScalar", "LessEqual", "<="}, + {"FilterDecimalColumnCompareDecimalScalar", "Greater", ">"}, + {"FilterDecimalColumnCompareDecimalScalar", "GreaterEqual", ">="}, + + {"FilterDecimalScalarCompareDecimalColumn", "Equal", "=="}, + {"FilterDecimalScalarCompareDecimalColumn", "NotEqual", "!="}, + {"FilterDecimalScalarCompareDecimalColumn", "Less", "<"}, + {"FilterDecimalScalarCompareDecimalColumn", "LessEqual", "<="}, + {"FilterDecimalScalarCompareDecimalColumn", "Greater", ">"}, + {"FilterDecimalScalarCompareDecimalColumn", "GreaterEqual", ">="}, + + {"FilterDecimalColumnCompareDecimalColumn", "Equal", "=="}, + {"FilterDecimalColumnCompareDecimalColumn", "NotEqual", "!="}, + {"FilterDecimalColumnCompareDecimalColumn", "Less", "<"}, + {"FilterDecimalColumnCompareDecimalColumn", "LessEqual", "<="}, + {"FilterDecimalColumnCompareDecimalColumn", "Greater", ">"}, + {"FilterDecimalColumnCompareDecimalColumn", "GreaterEqual", ">="}, + {"StringGroupScalarCompareStringGroupColumnBase", "Equal", "=="}, {"StringGroupScalarCompareStringGroupColumnBase", "NotEqual", "!="}, @@ -573,6 +855,9 @@ {"FilterDecimalColumnBetween", ""}, {"FilterDecimalColumnBetween", "!"}, + {"FilterTimestampColumnBetween", ""}, + {"FilterTimestampColumnBetween", "!"}, + {"ColumnCompareColumn", "Equal", "long", "double", "=="}, {"ColumnCompareColumn", "Equal", "double", "double", "=="}, {"ColumnCompareColumn", "NotEqual", "long", "double", "!="}, @@ -593,58 +878,34 @@ {"ColumnCompareColumn", "Greater", "double", "long", ">"}, {"ColumnCompareColumn", "GreaterEqual", "double", "long", ">="}, - // Interval comparisons + // Interval year month comparisons {"DTIScalarCompareColumn", "Equal", "interval_year_month"}, - {"DTIScalarCompareColumn", "Equal", "interval_day_time"}, {"DTIScalarCompareColumn", "NotEqual", "interval_year_month"}, - {"DTIScalarCompareColumn", "NotEqual", "interval_day_time"}, {"DTIScalarCompareColumn", "Less", "interval_year_month"}, - {"DTIScalarCompareColumn", "Less", "interval_day_time"}, {"DTIScalarCompareColumn", "LessEqual", "interval_year_month"}, - {"DTIScalarCompareColumn", "LessEqual", "interval_day_time"}, {"DTIScalarCompareColumn", "Greater", "interval_year_month"}, - {"DTIScalarCompareColumn", "Greater", "interval_day_time"}, {"DTIScalarCompareColumn", "GreaterEqual", "interval_year_month"}, - {"DTIScalarCompareColumn", "GreaterEqual", "interval_day_time"}, {"DTIColumnCompareScalar", "Equal", "interval_year_month"}, - {"DTIColumnCompareScalar", "Equal", "interval_day_time"}, {"DTIColumnCompareScalar", "NotEqual", "interval_year_month"}, - {"DTIColumnCompareScalar", "NotEqual", "interval_day_time"}, {"DTIColumnCompareScalar", "Less", "interval_year_month"}, - {"DTIColumnCompareScalar", "Less", "interval_day_time"}, {"DTIColumnCompareScalar", "LessEqual", "interval_year_month"}, - {"DTIColumnCompareScalar", "LessEqual", "interval_day_time"}, {"DTIColumnCompareScalar", "Greater", "interval_year_month"}, - {"DTIColumnCompareScalar", "Greater", "interval_day_time"}, {"DTIColumnCompareScalar", "GreaterEqual", "interval_year_month"}, - {"DTIColumnCompareScalar", "GreaterEqual", "interval_day_time"}, {"FilterDTIScalarCompareColumn", "Equal", "interval_year_month"}, - {"FilterDTIScalarCompareColumn", "Equal", "interval_day_time"}, {"FilterDTIScalarCompareColumn", "NotEqual", "interval_year_month"}, - {"FilterDTIScalarCompareColumn", "NotEqual", "interval_day_time"}, {"FilterDTIScalarCompareColumn", "Less", "interval_year_month"}, - {"FilterDTIScalarCompareColumn", "Less", "interval_day_time"}, {"FilterDTIScalarCompareColumn", "LessEqual", "interval_year_month"}, - {"FilterDTIScalarCompareColumn", "LessEqual", "interval_day_time"}, {"FilterDTIScalarCompareColumn", "Greater", "interval_year_month"}, - {"FilterDTIScalarCompareColumn", "Greater", "interval_day_time"}, {"FilterDTIScalarCompareColumn", "GreaterEqual", "interval_year_month"}, - {"FilterDTIScalarCompareColumn", "GreaterEqual", "interval_day_time"}, {"FilterDTIColumnCompareScalar", "Equal", "interval_year_month"}, - {"FilterDTIColumnCompareScalar", "Equal", "interval_day_time"}, {"FilterDTIColumnCompareScalar", "NotEqual", "interval_year_month"}, - {"FilterDTIColumnCompareScalar", "NotEqual", "interval_day_time"}, {"FilterDTIColumnCompareScalar", "Less", "interval_year_month"}, - {"FilterDTIColumnCompareScalar", "Less", "interval_day_time"}, {"FilterDTIColumnCompareScalar", "LessEqual", "interval_year_month"}, - {"FilterDTIColumnCompareScalar", "LessEqual", "interval_day_time"}, {"FilterDTIColumnCompareScalar", "Greater", "interval_year_month"}, - {"FilterDTIColumnCompareScalar", "Greater", "interval_day_time"}, {"FilterDTIColumnCompareScalar", "GreaterEqual", "interval_year_month"}, - {"FilterDTIColumnCompareScalar", "GreaterEqual", "interval_day_time"}, // Date comparisons {"DTIScalarCompareColumn", "Equal", "date"}, @@ -741,24 +1002,12 @@ // Casts {"ColumnUnaryFunc", "Cast", "long", "double", "", "", "(long)", "", ""}, {"ColumnUnaryFunc", "Cast", "double", "long", "", "", "(double)", "", ""}, - {"ColumnUnaryFunc", "CastTimestampToLongVia", "long", "long", "MathExpr.fromTimestamp", "", - "", "", "timestamp"}, - {"ColumnUnaryFunc", "CastTimestampToDoubleVia", "double", "long", - "MathExpr.fromTimestampToDouble", "", "", "", "timestamp"}, {"ColumnUnaryFunc", "CastDoubleToBooleanVia", "long", "double", "MathExpr.toBool", "", "", "", ""}, {"ColumnUnaryFunc", "CastLongToBooleanVia", "long", "long", "MathExpr.toBool", "", "", "", ""}, {"ColumnUnaryFunc", "CastDateToBooleanVia", "long", "long", "MathExpr.toBool", "", "", "", "date"}, - {"ColumnUnaryFunc", "CastTimestampToBooleanVia", "long", "long", "MathExpr.toBool", "", - "", "", "timestamp"}, - {"ColumnUnaryFunc", "CastLongToTimestampVia", "long", "long", "MathExpr.longToTimestamp", "", - "", "", ""}, - {"ColumnUnaryFunc", "CastMillisecondsLongToTimestampVia", "long", "long", "MathExpr.millisecondsLongToTimestamp", "", - "", "", ""}, - {"ColumnUnaryFunc", "CastDoubleToTimestampVia", "long", "double", - "MathExpr.doubleToTimestamp", "", "", "", ""}, // Boolean to long is done with an IdentityExpression // Boolean to double is done with standard Long to Double cast @@ -803,6 +1052,11 @@ {"VectorUDAFMinMaxString", "VectorUDAFMaxString", ">", "max", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: string)"}, + {"VectorUDAFMinMaxTimestamp", "VectorUDAFMaxTimestamp", "<", "max", + "_FUNC_(expr) - Returns the maximum value of expr (vectorized, type: timestamp)"}, + {"VectorUDAFMinMaxTimestamp", "VectorUDAFMinTimestamp", ">", "min", + "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: timestamp)"}, + //template, , {"VectorUDAFSum", "VectorUDAFSumLong", "long"}, {"VectorUDAFSum", "VectorUDAFSumDouble", "double"}, @@ -947,26 +1201,36 @@ private void generate() throws Exception { generateColumnCompareScalar(tdesc); } else if (tdesc[0].equals("ScalarCompareColumn")) { generateScalarCompareColumn(tdesc); - } else if (tdesc[0].equals("TimestampScalarCompareTimestampColumn")) { - generateTimestampScalarCompareTimestampColumn(tdesc); - } else if (tdesc[0].equals("ScalarCompareTimestampColumn")) { - generateScalarCompareTimestampColumn(tdesc); - } else if (tdesc[0].equals("TimestampColumnCompareTimestampScalar")) { - generateTimestampColumnCompareTimestampScalar(tdesc); - } else if (tdesc[0].equals("TimestampColumnCompareScalar")) { - generateTimestampColumnCompareScalar(tdesc); + + } else if (tdesc[0].equals("TimestampCompareTimestampBase")) { + generateTimestampCompareTimestampBase(tdesc); + + } else if (tdesc[0].equals("TimestampCompareTimestamp")) { + generateTimestampCompareTimestamp(tdesc); + + } else if (tdesc[0].equals("TimestampCompareLongDouble")) { + generateTimestampCompareLongDouble(tdesc); + + } else if (tdesc[0].equals("LongDoubleCompareTimestamp")) { + generateLongDoubleCompareTimestamp(tdesc); + } else if (tdesc[0].equals("FilterColumnCompareScalar")) { generateFilterColumnCompareScalar(tdesc); } else if (tdesc[0].equals("FilterScalarCompareColumn")) { generateFilterScalarCompareColumn(tdesc); - } else if (tdesc[0].equals("FilterTimestampColumnCompareTimestampScalar")) { - generateFilterTimestampColumnCompareTimestampScalar(tdesc); - } else if (tdesc[0].equals("FilterTimestampColumnCompareScalar")) { - generateFilterTimestampColumnCompareScalar(tdesc); - } else if (tdesc[0].equals("FilterTimestampScalarCompareTimestampColumn")) { - generateFilterTimestampScalarCompareTimestampColumn(tdesc); - } else if (tdesc[0].equals("FilterScalarCompareTimestampColumn")) { - generateFilterScalarCompareTimestampColumn(tdesc); + + } else if (tdesc[0].equals("FilterTimestampCompareTimestampBase")) { + generateFilterTimestampCompareTimestampBase(tdesc); + + } else if (tdesc[0].equals("FilterTimestampCompareTimestamp")) { + generateFilterTimestampCompareTimestamp(tdesc); + + } else if (tdesc[0].equals("FilterTimestampCompareLongDouble")) { + generateFilterTimestampCompareLongDouble(tdesc); + + } else if (tdesc[0].equals("FilterLongDoubleCompareTimestamp")) { + generateFilterLongDoubleCompareTimestamp(tdesc); + } else if (tdesc[0].equals("FilterColumnBetween")) { generateFilterColumnBetween(tdesc); } else if (tdesc[0].equals("ScalarArithmeticColumn") || tdesc[0].equals("ScalarDivideColumn")) { @@ -988,7 +1252,9 @@ private void generate() throws Exception { } else if (tdesc[0].equals("VectorUDAFMinMaxString")) { generateVectorUDAFMinMaxString(tdesc); } else if (tdesc[0].equals("VectorUDAFMinMaxDecimal")) { - generateVectorUDAFMinMaxDecimal(tdesc); + generateVectorUDAFMinMaxObject(tdesc); + } else if (tdesc[0].equals("VectorUDAFMinMaxTimestamp")) { + generateVectorUDAFMinMaxObject(tdesc); } else if (tdesc[0].equals("VectorUDAFSum")) { generateVectorUDAFSum(tdesc); } else if (tdesc[0].equals("VectorUDAFAvg")) { @@ -1009,7 +1275,9 @@ private void generate() throws Exception { generateFilterTruncStringColumnBetween(tdesc); } else if (tdesc[0].equals("FilterDecimalColumnBetween")) { generateFilterDecimalColumnBetween(tdesc); - } else if (tdesc[0].equals("StringGroupColumnCompareStringGroupScalarBase")) { + } else if (tdesc[0].equals("FilterTimestampColumnBetween")) { + generateFilterTimestampColumnBetween(tdesc); + } else if (tdesc[0].equals("StringGroupColumnCompareStringGroupScalarBase")) { generateStringGroupColumnCompareStringGroupScalarBase(tdesc); } else if (tdesc[0].equals("StringGroupColumnCompareStringScalar")) { generateStringGroupColumnCompareStringScalar(tdesc); @@ -1037,12 +1305,12 @@ private void generate() throws Exception { generateIfExprScalarColumn(tdesc); } else if (tdesc[0].equals("IfExprScalarScalar")) { generateIfExprScalarScalar(tdesc); - } else if (tdesc[0].equals("FilterDecimalColumnCompareScalar")) { - generateFilterDecimalColumnCompareScalar(tdesc); - } else if (tdesc[0].equals("FilterDecimalScalarCompareColumn")) { - generateFilterDecimalScalarCompareColumn(tdesc); - } else if (tdesc[0].equals("FilterDecimalColumnCompareColumn")) { - generateFilterDecimalColumnCompareColumn(tdesc); + } else if (tdesc[0].equals("FilterDecimalColumnCompareDecimalScalar")) { + generateFilterDecimalColumnCompareDecimalScalar(tdesc); + } else if (tdesc[0].equals("FilterDecimalScalarCompareDecimalColumn")) { + generateFilterDecimalScalarCompareDecimalColumn(tdesc); + } else if (tdesc[0].equals("FilterDecimalColumnCompareDecimalColumn")) { + generateFilterDecimalColumnCompareDecimalColumn(tdesc); } else if (tdesc[0].equals("FilterDTIScalarCompareColumn")) { generateFilterDTIScalarCompareColumn(tdesc); } else if (tdesc[0].equals("FilterDTIColumnCompareScalar")) { @@ -1057,24 +1325,37 @@ private void generate() throws Exception { generateScalarArithmeticColumn(tdesc); } else if (tdesc[0].equals("DTIColumnArithmeticDTIColumnNoConvert")) { generateColumnArithmeticColumn(tdesc); - } else if (tdesc[0].equals("ColumnArithmeticColumnWithConvert")) { - generateColumnArithmeticColumnWithConvert(tdesc); - } else if (tdesc[0].equals("ScalarArithmeticColumnWithConvert")) { - generateScalarArithmeticColumnWithConvert(tdesc); - } else if (tdesc[0].equals("ColumnArithmeticScalarWithConvert")) { - generateColumnArithmeticScalarWithConvert(tdesc); - } else if (tdesc[0].equals("DateTimeColumnArithmeticIntervalColumnWithConvert")) { - generateDateTimeColumnArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("DateTimeScalarArithmeticIntervalColumnWithConvert")) { - generateDateTimeScalarArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("DateTimeColumnArithmeticIntervalScalarWithConvert")) { - generateDateTimeColumnArithmeticIntervalScalarWithConvert(tdesc); - } else if (tdesc[0].equals("IntervalColumnArithmeticDateTimeColumnWithConvert")) { - generateDateTimeColumnArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("IntervalScalarArithmeticDateTimeColumnWithConvert")) { - generateDateTimeScalarArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("IntervalColumnArithmeticDateTimeScalarWithConvert")) { - generateDateTimeColumnArithmeticIntervalScalarWithConvert(tdesc); + + } else if (tdesc[0].equals("DateArithmeticIntervalYearMonth")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + + } else if (tdesc[0].equals("IntervalYearMonthArithmeticDate")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + + } else if (tdesc[0].equals("TimestampArithmeticIntervalYearMonth")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + + } else if (tdesc[0].equals("IntervalYearMonthArithmeticTimestamp")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + + } else if (tdesc[0].equals("TimestampArithmeticTimestampBase")) { + generateTimestampArithmeticTimestampBase(tdesc); + + } else if (tdesc[0].equals("TimestampArithmeticTimestamp")) { + generateTimestampArithmeticTimestamp(tdesc); + + } else if (tdesc[0].equals("DateArithmeticTimestampBase")) { + generateDateArithmeticTimestampBase(tdesc); + + } else if (tdesc[0].equals("DateArithmeticTimestamp")) { + generateDateArithmeticTimestamp(tdesc); + + } else if (tdesc[0].equals("TimestampArithmeticDateBase")) { + generateTimestampArithmeticDateBase(tdesc); + + } else if (tdesc[0].equals("TimestampArithmeticDate")) { + generateTimestampArithmeticDate(tdesc); + } else { continue; } @@ -1140,6 +1421,20 @@ private void generateFilterDecimalColumnBetween(String[] tdesc) throws IOExcepti className, templateString); } + private void generateFilterTimestampColumnBetween(String[] tdesc) throws IOException { + String optionalNot = tdesc[1]; + String className = "FilterTimestampColumn" + (optionalNot.equals("!") ? "Not" : "") + + "Between"; + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", optionalNot); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + private void generateFilterColumnBetween(String[] tdesc) throws Exception { String operandType = tdesc[1]; String optionalNot = tdesc[2]; @@ -1211,7 +1506,7 @@ private void generateVectorUDAFMinMaxString(String[] tdesc) throws Exception { className, templateString); } - private void generateVectorUDAFMinMaxDecimal(String[] tdesc) throws Exception { + private void generateVectorUDAFMinMaxObject(String[] tdesc) throws Exception { String className = tdesc[1]; String operatorSymbol = tdesc[2]; String descName = tdesc[3]; @@ -1609,7 +1904,7 @@ private void generateColumnUnaryMinus(String[] tdesc) throws Exception { String vectorExprArgType = operandType; if (operandType.equals("long")) { // interval types can use long version - vectorExprArgType = "int_interval_family"; + vectorExprArgType = "int_interval_year_month"; } // Expand, and write result templateString = templateString.replaceAll("", className); @@ -1646,8 +1941,8 @@ private void generateIfExprColumnScalar(String[] tdesc) throws Exception { // Toss in timestamp and date. if (operandType2.equals("long") && operandType3.equals("long")) { - vectorExprArgType2 = "int_datetime_interval_family"; - vectorExprArgType3 = "int_datetime_interval_family"; + vectorExprArgType2 = "int_date_interval_year_month"; + vectorExprArgType3 = "int_date_interval_year_month"; } templateString = templateString.replaceAll("", vectorExprArgType2); templateString = templateString.replaceAll("", vectorExprArgType3); @@ -1679,8 +1974,8 @@ private void generateIfExprScalarColumn(String[] tdesc) throws Exception { // Toss in timestamp and date. if (operandType2.equals("long") && operandType3.equals("long")) { - vectorExprArgType2 = "int_datetime_interval_family"; - vectorExprArgType3 = "int_datetime_interval_family"; + vectorExprArgType2 = "int_date_interval_year_month"; + vectorExprArgType3 = "int_date_interval_year_month"; } templateString = templateString.replaceAll("", vectorExprArgType2); templateString = templateString.replaceAll("", vectorExprArgType3); @@ -1711,8 +2006,8 @@ private void generateIfExprScalarScalar(String[] tdesc) throws Exception { // Toss in timestamp and date. if (operandType2.equals("long") && operandType3.equals("long")) { - vectorExprArgType2 = "int_datetime_interval_family"; - vectorExprArgType3 = "int_datetime_interval_family"; + vectorExprArgType2 = "int_date_interval_year_month"; + vectorExprArgType3 = "int_date_interval_year_month"; } templateString = templateString.replaceAll("", vectorExprArgType2); templateString = templateString.replaceAll("", vectorExprArgType3); @@ -1843,12 +2138,10 @@ private void generateColumnCompareOperatorColumn(String[] tdesc, boolean filter, String vectorExprArgType1 = operandType1; String vectorExprArgType2 = operandType2; - // For column to column only, we toss in timestamp and date. - // But {timestamp|date} and scalar must be handled separately. + // For column to column only, we toss in date and interval_year_month. if (operandType1.equals("long") && operandType2.equals("long")) { - // Let comparisons occur for DATE and TIMESTAMP, too. - vectorExprArgType1 = "int_datetime_interval_family"; - vectorExprArgType2 = "int_datetime_interval_family"; + vectorExprArgType1 = "int_date_interval_year_month"; + vectorExprArgType2 = "int_date_interval_year_month"; } templateString = templateString.replaceAll("", vectorExprArgType1); templateString = templateString.replaceAll("", vectorExprArgType2); @@ -1870,25 +2163,51 @@ private void generateColumnCompareOperatorColumn(String[] tdesc, boolean filter, } } - private void generateTimestampScalarCompareTimestampColumn(String[] tdesc) throws Exception { + // ----------------------------------------------------------------------------------------------- + // + // Filter timestamp against timestamp, long (seconds), and double (seconds with fractional + // nanoseconds). + // + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* Filter {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Scalar + //* Filter {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // + // Filter TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // Filter TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* Filter {Long|Double}Scalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // ----------------------------------------------------------------------------------------------- + + private void generateFilterTimestampCompareTimestampBase(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String className = "TimestampScalar" + operatorName + "TimestampColumn"; - String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalar" + operatorName + "LongColumn"; + String operatorSymbol = tdesc[2]; + String className = "FilterTimestamp" + tdesc[3] + operatorName + "Timestamp" + tdesc[4] + "Base"; + //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "FilterTimestamp" + (tdesc[3].equals("Col") ? "Column" : tdesc[3]) + "CompareTimestamp" + + tdesc[4] + "Base"; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorSymbol); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateTimestampColumnCompareTimestampScalar(String[] tdesc) throws Exception { + private void generateFilterTimestampCompareTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String className = "TimestampCol" + operatorName + "TimestampScalar"; - String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongCol" + operatorName + "LongScalar"; + String operandType = tdesc[2]; + String camelCaseOperandType = getCamelCaseType(operandType); + String className = "Filter" + camelCaseOperandType + tdesc[3] + operatorName + camelCaseOperandType + tdesc[4]; + String baseClassName = "FilterTimestamp" + tdesc[3] + operatorName + "Timestamp" + tdesc[4] + "Base"; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Filter" + camelCaseOperandType + (tdesc[3].equals("Col") ? "Column" : tdesc[3]) + "Compare" + camelCaseOperandType + + tdesc[4]; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); @@ -1896,106 +2215,210 @@ private void generateTimestampColumnCompareTimestampScalar(String[] tdesc) throw className, templateString); } - private void generateFilterTimestampColumnCompareTimestampScalar(String[] tdesc) throws Exception { + private void generateFilterTimestampCompareLongDouble(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String className = "FilterTimestampCol" + operatorName + "TimestampScalar"; - String baseClassName = "FilterLongCol" + operatorName + "LongScalar"; + String operandType = tdesc[2]; + String camelCaseOperandType = getCamelCaseType(operandType); + String operatorSymbol = tdesc[3]; + String inputColumnVectorType2 = this.getColumnVectorType(operandType); + + String className = "FilterTimestamp" + tdesc[4] + operatorName + camelCaseOperandType + tdesc[5]; + + // Timestamp Scalar case becomes use long/double scalar class. + String baseClassName; + if (tdesc[4].equals("Scalar")) { + baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.gen." + + "Filter" + camelCaseOperandType + "Scalar" + operatorName + camelCaseOperandType + "Column"; + } else { + baseClassName = ""; + } + //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "FilterTimestamp" + (tdesc[4].equals("Col") ? "Column" : tdesc[4]) + "CompareLongDouble" + + tdesc[5]; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); + if (baseClassName.length() > 0) { + templateString = templateString.replaceAll("", baseClassName); + } + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType2); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateFilterTimestampScalarCompareTimestampColumn(String[] tdesc) throws Exception { + private void generateFilterLongDoubleCompareTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String className = "FilterTimestampScalar" + operatorName + "TimestampColumn"; - String baseClassName = "FilterLongScalar" + operatorName + "LongColumn"; + String operandType = tdesc[2]; + String camelCaseOperandType = getCamelCaseType(operandType); + String operatorSymbol = tdesc[3]; + String inputColumnVectorType1 = this.getColumnVectorType(operandType); + + String className = "Filter" + getCamelCaseType(operandType) + tdesc[4] + operatorName + "Timestamp" + tdesc[5]; + + // Timestamp Scalar case becomes use long/double scalar class. + String baseClassName; + if (tdesc[5].equals("Scalar")) { + baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.gen." + + "Filter" + camelCaseOperandType + "Col" + operatorName + camelCaseOperandType + "Scalar"; + } else { + baseClassName = ""; + } + //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "FilterLongDouble" + (tdesc[4].equals("Col") ? "Column" : tdesc[4]) + "CompareTimestamp" + + tdesc[5]; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); + if (baseClassName.length() > 0) { + templateString = templateString.replaceAll("", baseClassName); + } + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType1); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private String timestampScalarConversion(String operandType) { + private String timestampLongDoubleMethod(String operandType) { if (operandType.equals("long")) { - return "secondsToNanoseconds"; + return "getTimestampSeconds"; } else if (operandType.equals("double")) { - return "doubleToNanoseconds"; + return "getTimestampSecondsWithFractionalNanos"; } else { return "unknown"; } } - private void generateScalarCompareTimestampColumn(String[] tdesc) throws Exception { + // ----------------------------------------------------------------------------------------------- + // + // Compare timestamp against timestamp, long (seconds), and double (seconds with fractional + // nanoseconds). + // + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Scalar + //* {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // + // TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* {Long|Double}Scalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // ----------------------------------------------------------------------------------------------- + + private void generateTimestampCompareTimestampBase(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String operandType = tdesc[2]; - String className = getCamelCaseType(operandType) + "Scalar" + operatorName + "TimestampColumn"; - String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalar" + operatorName + "LongColumn"; + String operatorSymbol = tdesc[2]; + String className = "Timestamp" + tdesc[3] + operatorName + "Timestamp" + tdesc[4] + "Base"; + //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Timestamp" + (tdesc[3].equals("Col") ? "Column" : tdesc[3]) + "CompareTimestamp" + + tdesc[4] + "Base"; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); - templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); + templateString = templateString.replaceAll("", operatorSymbol); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateTimestampColumnCompareScalar(String[] tdesc) throws Exception { + private void generateTimestampCompareTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; - String className = "TimestampCol" + operatorName + getCamelCaseType(operandType) + "Scalar"; - String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongCol" + operatorName + "LongScalar"; + String camelCaseOperandType = getCamelCaseType(operandType); + String className = camelCaseOperandType + tdesc[3] + operatorName + camelCaseOperandType + tdesc[4]; + String baseClassName = "Timestamp" + tdesc[3] + operatorName + "Timestamp" + tdesc[4] + "Base"; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = camelCaseOperandType + (tdesc[3].equals("Col") ? "Column" : tdesc[3]) + "Compare" + camelCaseOperandType + + tdesc[4]; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); - templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateFilterTimestampColumnCompareScalar(String[] tdesc) throws Exception { + private void generateTimestampCompareLongDouble(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; - String className = "FilterTimestampCol" + operatorName + getCamelCaseType(operandType) + "Scalar"; - String baseClassName = "FilterLongCol" + operatorName + "LongScalar"; + String camelCaseOperandType = getCamelCaseType(operandType); + String operatorSymbol = tdesc[3]; + String inputColumnVectorType2 = this.getColumnVectorType(operandType); + + String className = "Timestamp" + tdesc[4] + operatorName + getCamelCaseType(operandType) + tdesc[5]; + + + // Timestamp Scalar case becomes use long/double scalar class. + String baseClassName; + if (tdesc[4].equals("Scalar")) { + baseClassName = camelCaseOperandType + "Scalar" + operatorName + camelCaseOperandType + "Column"; + } else { + baseClassName = ""; + } + //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Timestamp" + (tdesc[4].equals("Col") ? "Column" : tdesc[4]) + "CompareLongDouble" + + tdesc[5]; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); + if (baseClassName.length() > 0) { + templateString = templateString.replaceAll("", baseClassName); + } templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType2); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateFilterScalarCompareTimestampColumn(String[] tdesc) throws Exception { + private void generateLongDoubleCompareTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; - String className = "Filter" + getCamelCaseType(operandType) + "Scalar" + operatorName + "TimestampColumn"; - String baseClassName = "FilterLongScalar" + operatorName + "LongColumn"; + String camelCaseOperandType = getCamelCaseType(operandType); + String operatorSymbol = tdesc[3]; + String inputColumnVectorType1 = this.getColumnVectorType(operandType); + + String className = getCamelCaseType(operandType) + tdesc[4] + operatorName + "Timestamp" + tdesc[5]; + + // Timestamp Scalar case becomes use long/double scalar class. + String baseClassName; + if (tdesc[5].equals("Scalar")) { + baseClassName = camelCaseOperandType + "Col" + operatorName + camelCaseOperandType + "Scalar"; + } else { + baseClassName = ""; + } + //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "LongDouble" + (tdesc[4].equals("Col") ? "Column" : tdesc[4]) + "CompareTimestamp" + + tdesc[5]; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); + if (baseClassName.length() > 0) { + templateString = templateString.replaceAll("", baseClassName); + } templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType1); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } + // ----------------------------------------------------------------------------------------------- + // + // ----------------------------------------------------------------------------------------------- private void generateColumnArithmeticOperatorColumn(String[] tdesc, String returnType, String className) throws Exception { @@ -2098,7 +2521,7 @@ private void generateColumnArithmeticOperatorScalar(String[] tdesc, String retur className, templateString); String testScalarType = operandType2; - if (isDateTimeIntervalType(testScalarType)) { + if (isDateIntervalType(testScalarType)) { testScalarType = "long"; } @@ -2180,7 +2603,7 @@ private void generateScalarArithmeticOperatorColumn(String[] tdesc, String retur className, templateString); String testScalarType = operandType1; - if (isDateTimeIntervalType(testScalarType)) { + if (isDateIntervalType(testScalarType)) { testScalarType = "long"; } @@ -2297,19 +2720,19 @@ private void generateScalarArithmeticColumn(String[] tdesc) throws Exception { generateScalarArithmeticOperatorColumn(tdesc, returnType, className); } - private void generateFilterDecimalColumnCompareScalar(String[] tdesc) throws IOException { + private void generateFilterDecimalColumnCompareDecimalScalar(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterDecimalCol" + operatorName + "DecimalScalar"; generateDecimalColumnCompare(tdesc, className); } - private void generateFilterDecimalScalarCompareColumn(String[] tdesc) throws IOException { + private void generateFilterDecimalScalarCompareDecimalColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterDecimalScalar" + operatorName + "DecimalColumn"; generateDecimalColumnCompare(tdesc, className); } - private void generateFilterDecimalColumnCompareColumn(String[] tdesc) throws IOException { + private void generateFilterDecimalColumnCompareDecimalColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterDecimalCol" + operatorName + "DecimalColumn"; generateDecimalColumnCompare(tdesc, className); @@ -2395,310 +2818,332 @@ private void generateFilterDTIColumnCompareScalar(String[] tdesc) throws Excepti className, templateString); } - private void generateColumnArithmeticColumnWithConvert(String[] tdesc) throws Exception { + // DateColumnArithmeticIntervalYearMonthColumn.txt + // DateScalarArithmeticIntervalYearMonthColumn.txt + // DateColumnArithmeticIntervalYearMonthScalar.txt + // + // IntervalYearMonthColumnArithmeticDateColumn.txt + // IntervalYearMonthScalarArithmeticDateColumn.txt + // IntervalYearMonthColumnArithmeticDateScalar.txt + // + // TimestampColumnArithmeticIntervalYearMonthColumn.txt + // TimestampScalarArithmeticIntervalYearMonthColumn.txt + // TimestampColumnArithmeticIntervalYearMonthScalar.txt + // + // IntervalYearMonthColumnArithmeticTimestampColumn.txt + // IntervalYearMonthScalarArithmeticTimestampColumn.txt + // IntervalYearMonthColumnArithmeticTimestampScalar.txt + // + private void generateDateTimeArithmeticIntervalYearMonth(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion1 = tdesc[5]; - String typeConversion2 = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String operatorSymbol = tdesc[2]; + String operandType1 = tdesc[3]; + String colOrScalar1 = tdesc[4]; + String operandType2 = tdesc[5]; + String colOrScalar2 = tdesc[6]; + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = getCamelCaseType(operandType1) + (colOrScalar1.equals("Col") ? "Column" : colOrScalar1) + "Arithmetic" + + getCamelCaseType(operandType2) + colOrScalar2; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType1); - templateString = templateString.replaceAll("", inputColumnVectorType2); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); templateString = templateString.replaceAll("", operatorSymbol); - templateString = templateString.replaceAll("", operandType1); - templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion1); - templateString = templateString.replaceAll("", typeConversion2); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); - testCodeGen.addColumnColumnOperationTestCases( + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); + + if (colOrScalar1.equals("Col") && colOrScalar1.equals("Column")) { + testCodeGen.addColumnColumnOperationTestCases( + className, + inputColumnVectorType1, + inputColumnVectorType2, + "long"); + } else if (colOrScalar1.equals("Col") && colOrScalar1.equals("Scalar")) { + String testScalarType = operandType2; + if (isDateIntervalType(testScalarType)) { + testScalarType = "long"; + } + testCodeGen.addColumnScalarOperationTestCases( + true, className, inputColumnVectorType1, - inputColumnVectorType2, - outputColumnVectorType); + "long", + testScalarType); + } else if (colOrScalar1.equals("Scalar") && colOrScalar1.equals("Column")) { + String testScalarType = operandType1; + if (isDateIntervalType(testScalarType)) { + testScalarType = "long"; + } + + testCodeGen.addColumnScalarOperationTestCases( + false, + className, + inputColumnVectorType2, + "long", + testScalarType); + } + } + + private String getTimestampHiveType(String operandType) { + if (operandType.equals("timestamp")) { + return "Timestamp"; + } else if (operandType.equals("interval_day_time")) { + return "HiveIntervalDayTime"; + } else { + return "Unknown"; + } + } + + private String getPisaTimestampConversion(String operandType) { + if (operandType.equals("timestamp")) { + return "new PisaTimestamp(value)"; + } else if (operandType.equals("interval_day_time")) { + return "value.pisaTimestampUpdate(new PisaTimestamp())"; + } else { + return "Unknown"; + } } - private void generateScalarArithmeticColumnWithConvert(String[] tdesc) throws Exception { + private String replaceTimestampScalar(String templateString, int argNum, String operandType) { + + if (!operandType.equals("timestamp") && !operandType.equals("interval_day_time")) { + return templateString; + } + + String scalarHiveTimestampTypePattern = ""; + String pisaTimestampConversionPattern = ""; + + templateString = templateString.replaceAll(scalarHiveTimestampTypePattern, getTimestampHiveType(operandType)); + templateString = templateString.replaceAll(pisaTimestampConversionPattern, getPisaTimestampConversion(operandType)); + + return templateString; + } + + // TimestampColumnArithmeticTimestampColumnBase.txt + // TimestampScalarArithmeticTimestampColumnBase.txt + // TimestampColumnArithmeticTimestampScalarBase.txt + // + private void generateTimestampArithmeticTimestampBase(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion1 = tdesc[5]; - String typeConversion2 = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType( - returnType == null ? "long" : returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType2); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[2]; + String colOrScalar2 = tdesc[3]; + + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Timestamp" + (colOrScalar1.equals("Col") ? "Column" : colOrScalar1) + "Arithmetic" + + "Timestamp" + colOrScalar2 + "Base"; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); - templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); - templateString = templateString.replaceAll("", operandType1); - templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion1); - templateString = templateString.replaceAll("", typeConversion2); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); - writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, - className, templateString); - - String testScalarType = operandType1; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorName.toLowerCase()); - testCodeGen.addColumnScalarOperationTestCases( - false, - className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + baseClassName, templateString); } - private void generateColumnArithmeticScalarWithConvert(String[] tdesc) throws Exception { + // TimestampColumnArithmeticTimestampColumn.txt + // TimestampScalarArithmeticTimestampColumn.txt + // TimestampColumnArithmeticTimestampScalar.txt + // + private void generateTimestampArithmeticTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion1 = tdesc[5]; - String typeConversion2 = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType1); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[3]; + String operandType2 = tdesc[4]; + String colOrScalar2 = tdesc[5]; + + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Timestamp" + (colOrScalar1.equals("Col") ? "Column" : colOrScalar1) + "Arithmetic" + + "Timestamp" + colOrScalar2; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion1); - templateString = templateString.replaceAll("", typeConversion2); + if (colOrScalar1.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 1, operandType1); + } + if (colOrScalar2.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 2, operandType2); + } + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); - String testScalarType = operandType2; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - testCodeGen.addColumnScalarOperationTestCases( - true, + /* UNDONE: Col Col, vs Scalar Col vs Col Scalar + testCodeGen.addColumnColumnOperationTestCases( className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + inputColumnVectorType1, + inputColumnVectorType2, + "long"); + */ } - private void generateDateTimeColumnArithmeticIntervalColumnWithConvert(String[] tdesc) throws Exception { + // DateColumnArithmeticTimestampColumnBase.txt + // DateScalarArithmeticTimestampColumnBase.txt + // DateColumnArithmeticTimestampScalarBase.txt + // + private void generateDateArithmeticTimestampBase(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String colOrScalar1 = tdesc[2]; + String colOrScalar2 = tdesc[3]; + + String baseClassName = "Date" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; + + //Read the template into a string; + String fileName = "Date" + (colOrScalar1.equals("Col") ? "Column" : colOrScalar1) + "Arithmetic" + + "Timestamp" + colOrScalar2 + "Base"; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorName.toLowerCase()); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + baseClassName, templateString); + } + + // DateColumnArithmeticTimestampColumn.txt + // DateScalarArithmeticTimestampColumn.txt + // DateColumnArithmeticTimestampScalar.txt + // + private void generateDateArithmeticTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion = tdesc[5]; - String operatorFunction = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[3]; + String operandType2 = tdesc[4]; + String colOrScalar2 = tdesc[5]; + + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; + String baseClassName = "Date" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Date" + (colOrScalar1.equals("Col") ? "Column" : colOrScalar1) + "Arithmetic" + + "Timestamp" + colOrScalar2; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType1); - templateString = templateString.replaceAll("", inputColumnVectorType2); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion); - templateString = templateString.replaceAll("", operatorFunction); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); + if (colOrScalar1.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 1, operandType1); + } + if (colOrScalar2.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 2, operandType2); + } + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); + + /* UNDONE: Col Col, vs Scalar Col vs Col Scalar testCodeGen.addColumnColumnOperationTestCases( className, inputColumnVectorType1, inputColumnVectorType2, - outputColumnVectorType); + "long"); + */ } - private void generateDateTimeScalarArithmeticIntervalColumnWithConvert(String[] tdesc) throws Exception { + // TimestampColumnArithmeticDateColumnBase.txt + // TimestampScalarArithmeticDateColumnBase.txt + // TimestampColumnArithmeticDateScalarBase.txt + // + private void generateTimestampArithmeticDateBase(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion = tdesc[5]; - String operatorFunction = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType( - returnType == null ? "long" : returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType2); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[2]; + String colOrScalar2 = tdesc[3]; + + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Date" + colOrScalar2 + "Base"; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Timestamp" + (colOrScalar1.equals("Col") ? "Column" : colOrScalar1) + "Arithmetic" + + "Date" + colOrScalar2 + "Base"; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); - templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); - templateString = templateString.replaceAll("", operandType1); - templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion); - templateString = templateString.replaceAll("", operatorFunction); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); - writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, - className, templateString); - - String testScalarType = operandType1; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorName.toLowerCase()); - testCodeGen.addColumnScalarOperationTestCases( - false, - className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + baseClassName, templateString); } - private void generateDateTimeColumnArithmeticIntervalScalarWithConvert(String[] tdesc) throws Exception { + // TimestampColumnArithmeticDateColumn.txt + // TimestampScalarArithmeticDateColumn.txt + // TimestampColumnArithmeticDateScalar.txt + // + private void generateTimestampArithmeticDate(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion = tdesc[5]; - String operatorFunction = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType1); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[3]; + String operandType2 = tdesc[4]; + String colOrScalar2 = tdesc[5]; + + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Date" + colOrScalar2 + "Base"; //Read the template into a string; - File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String fileName = "Timestamp" + (colOrScalar1.equals("Col") ? "Column" : colOrScalar1) + "Arithmetic" + + "Date" + colOrScalar2; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, fileName + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion); - templateString = templateString.replaceAll("", operatorFunction); + if (colOrScalar1.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 1, operandType1); + } + if (colOrScalar2.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 2, operandType2); + } + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); - String testScalarType = operandType2; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - testCodeGen.addColumnScalarOperationTestCases( - true, + /* UNDONE: Col Col, vs Scalar Col vs Col Scalar + testCodeGen.addColumnColumnOperationTestCases( className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + inputColumnVectorType1, + inputColumnVectorType2, + "long"); + */ } - private static boolean isDateTimeIntervalType(String type) { + private static boolean isDateIntervalType(String type) { return (type.equals("date") - || type.equals("timestamp") - || type.equals("interval_year_month") + || type.equals("interval_year_month")); + } + + private static boolean isTimestampIntervalType(String type) { + return (type.equals("timestamp") || type.equals("interval_day_time")); } @@ -2821,12 +3266,14 @@ private String getArithmeticReturnType(String operandType1, private String getColumnVectorType(String primitiveType) throws Exception { if(primitiveType.equals("double")) { return "DoubleColumnVector"; - } else if (primitiveType.equals("long") || isDateTimeIntervalType(primitiveType)) { + } else if (primitiveType.equals("long") || isDateIntervalType(primitiveType)) { return "LongColumnVector"; } else if (primitiveType.equals("decimal")) { return "DecimalColumnVector"; } else if (primitiveType.equals("string")) { return "BytesColumnVector"; + } else if (isTimestampIntervalType(primitiveType)) { + return "TimestampColumnVector"; } throw new Exception("Unimplemented primitive column vector type: " + primitiveType); } diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java common/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java index e8dc21b..e262f01 100644 --- common/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java +++ common/src/java/org/apache/hadoop/hive/common/type/HiveIntervalDayTime.java @@ -117,6 +117,12 @@ public void set(long seconds, int nanos) { normalizeSecondsAndNanos(); } + public void set(PisaTimestamp pisaTimestamp) { + this.totalSeconds = pisaTimestamp.getEpochSeconds(); + this.nanos = pisaTimestamp.getSignedNanos(); + normalizeSecondsAndNanos(); + } + public void set(BigDecimal totalSecondsBd) { long totalSeconds = totalSecondsBd.longValue(); BigDecimal fractionalSecs = totalSecondsBd.remainder(BigDecimal.ONE); @@ -132,6 +138,11 @@ public HiveIntervalDayTime negate() { return new HiveIntervalDayTime(-getTotalSeconds(), -getNanos()); } + public PisaTimestamp pisaTimestampUpdate(PisaTimestamp pisaTimestamp) { + // NOTE: Our nanos here are *SIGNED*. + return pisaTimestamp.updateFromEpochSecondsAndSignedNanos(totalSeconds, nanos); + } + @Override public int compareTo(HiveIntervalDayTime other) { long cmp = this.totalSeconds - other.totalSeconds; diff --git orc/src/java/org/apache/orc/TypeDescription.java orc/src/java/org/apache/orc/TypeDescription.java index f97a113..bd900ac 100644 --- orc/src/java/org/apache/orc/TypeDescription.java +++ orc/src/java/org/apache/orc/TypeDescription.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -282,9 +283,10 @@ private ColumnVector createColumn(int maxSize) { case SHORT: case INT: case LONG: - case TIMESTAMP: case DATE: return new LongColumnVector(); + case TIMESTAMP: + return new TimestampColumnVector(); case FLOAT: case DOUBLE: return new DoubleColumnVector(); diff --git orc/src/java/org/apache/orc/impl/WriterImpl.java orc/src/java/org/apache/orc/impl/WriterImpl.java index 5157d4d..ecac520 100644 --- orc/src/java/org/apache/orc/impl/WriterImpl.java +++ orc/src/java/org/apache/orc/impl/WriterImpl.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.io.Text; @@ -1732,17 +1733,20 @@ void recordPosition(PositionRecorder recorder) throws IOException { void writeBatch(ColumnVector vector, int offset, int length) throws IOException { super.writeBatch(vector, offset, length); - LongColumnVector vec = (LongColumnVector) vector; + TimestampColumnVector vec = (TimestampColumnVector) vector; if (vector.isRepeating) { if (vector.noNulls || !vector.isNull[0]) { - long value = vec.vector[0]; - long valueMillis = value / MILLIS_PER_NANO; - indexStatistics.updateTimestamp(valueMillis); + long millis = vec.getEpochMilliseconds(0); + int adjustedNanos = vec.getSignedNanos(0); + if (adjustedNanos < 0) { + adjustedNanos += NANOS_PER_SECOND; + } + indexStatistics.updateTimestamp(millis); if (createBloomFilter) { - bloomFilter.addLong(valueMillis); + bloomFilter.addLong(millis); } - final long secs = value / NANOS_PER_SECOND - base_timestamp; - final long nano = formatNanos((int) (value % NANOS_PER_SECOND)); + final long secs = vec.getEpochSeconds(0) - base_timestamp; + final long nano = formatNanos(adjustedNanos); for(int i=0; i < length; ++i) { seconds.write(secs); nanos.write(nano); @@ -1751,18 +1755,17 @@ void writeBatch(ColumnVector vector, int offset, } else { for(int i=0; i < length; ++i) { if (vec.noNulls || !vec.isNull[i + offset]) { - long value = vec.vector[i + offset]; - long valueMillis = value / MILLIS_PER_NANO; - long valueSecs = value /NANOS_PER_SECOND - base_timestamp; - int valueNanos = (int) (value % NANOS_PER_SECOND); - if (valueNanos < 0) { - valueNanos += NANOS_PER_SECOND; + long secs = vec.getEpochSeconds(i + offset) - base_timestamp; + long millis = vec.getEpochMilliseconds(i + offset); + int adjustedNanos = vec.getSignedNanos(i + offset); + if (adjustedNanos < 0) { + adjustedNanos += NANOS_PER_SECOND; } - seconds.write(valueSecs); - nanos.write(formatNanos(valueNanos)); - indexStatistics.updateTimestamp(valueMillis); + seconds.write(secs); + nanos.write(formatNanos(adjustedNanos)); + indexStatistics.updateTimestamp(millis); if (createBloomFilter) { - bloomFilter.addLong(valueMillis); + bloomFilter.addLong(millis); } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt index f2ec645..fe8f535 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt @@ -34,6 +34,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends LongColLongColumn { + private static final long serialVersionUID = 1L; + public (int colNum1, int colNum2, int outputColumn) { super(colNum1, colNum2, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt index 1a360b8..293369f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt @@ -29,6 +29,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends LongColLongScalar { + private static final long serialVersionUID = 1L; + public (int colNum, long value, int outputColumn) { super(colNum, value, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt index 9d692cb..60884cd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt @@ -29,6 +29,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends { + private static final long serialVersionUID = 1L; + public (int colNum, long value, int outputColumn) { super(colNum, value, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt index 753ea71..04607f6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt @@ -34,6 +34,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends LongScalarLongColumn { + private static final long serialVersionUID = 1L; + public (long value, int colNum, int outputColumn) { super(value, colNum, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt index fdd453a..d518c44 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt @@ -34,6 +34,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends { + private static final long serialVersionUID = 1L; + public (long value, int colNum, int outputColumn) { super(value, colNum, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..845bc5f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between date and interval year month columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type interval_year_month (months). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector1[0], (int) vector2[0]); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector1[0], (int) vector2[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector1[0], (int) vector2[i]); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[0]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[0]); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[i]); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt new file mode 100644 index 0000000..86a95c9 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a date column and a interval year month scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector[0], (int) value); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt new file mode 100644 index 0000000..6241ee2 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticTimestampColumn.txt, which covers binary arithmetic + * expressions between a date column and a timestamp column. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..a61b769 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumnBase.txt @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template DateColumnArithmeticTimestampColumnBase.txt, a base class + * which covers binary arithmetic expressions between a date column and timestamp column. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type timestamp/interval_day_time. + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + inputColVector2.asScratchPisaTimestamp(0), + 0); + } else if (inputColVector1.isRepeating) { + PisaTimestamp value1 = + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value1, + inputColVector2.asScratchPisaTimestamp(i), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value1, + inputColVector2.asScratchPisaTimestamp(i), + i); + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = inputColVector2.asScratchPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value2, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value2, + i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + inputColVector2.asScratchPisaTimestamp(i), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + inputColVector2.asScratchPisaTimestamp(i), + i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt new file mode 100644 index 0000000..b813d11 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hive.common.util.DateUtils; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticTimestampScalar.txt, which covers binary arithmetic + * expressions between a date column and a timestamp scalar. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, value, int outputColumn) { + super(colNum, , outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalarBase.txt new file mode 100644 index 0000000..d64fba0 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalarBase.txt @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template DateColumnArithmeticTimestampScalarBase.txt, a base class + * which covers binary arithmetic expressions between a date column and a timestamp scalar. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum, PisaTimestamp value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..653565e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateTimeScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type Interval_Year_Month (months). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type Date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(value, (int) vector[0]); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt new file mode 100644 index 0000000..e93bed5 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateScalarArithmeticTimestampColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (long value, int colNum, int outputColumn) { + super(value, colNum, outputColumn); + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..a1f4e6f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumnBase.txt @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template DateTimeScalarArithmeticTimestampColumnBase.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type timestamp/interval_day_time. + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.( + value, + inputColVector2.asScratchPisaTimestamp(0), + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + inputColVector2.asScratchPisaTimestamp(i), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + inputColVector2.asScratchPisaTimestamp(i), + i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + inputColVector2.asScratchPisaTimestamp(i), + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + inputColVector2.asScratchPisaTimestamp(i), + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalColumnWithConvert.txt deleted file mode 100644 index cd7a1e7..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalColumnWithConvert.txt +++ /dev/null @@ -1,175 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template DateTimeColumnArithmeticIntervalColumnWithConvert.txt, which covers binary arithmetic - * expressions between columns. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum1, int colNum2, int outputColumn) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector1 = () batch.cols[colNum1]; - inputColVector2 = () batch.cols[colNum2]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - [] vector1 = inputColVector1.vector; - [] vector2 = inputColVector2.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first - NullUtil.propagateNullsColCol( - inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - - /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or - * more inputs are null. This is to improve speed by avoiding - * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputVector[0] = ((vector1[0]), (int) vector2[0]); - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector1[0]), (int) vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector1[0]), (int) vector2[i]); - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector1[i]), (int) vector2[0]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector1[i]), (int) vector2[0]); - } - } - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector1[i]), (int) vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector1[i]), (int) vector2[i]); - } - } - } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and - * NaN for double. This is to prevent possible later zero-divide errors - * in complex arithmetic expressions like col2 / (col1 - 1) - * in the case when some col1 entries are null. - */ - NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} - diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt deleted file mode 100644 index abee249..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt +++ /dev/null @@ -1,152 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template ColumnArithmeticScalarWithConvert.txt, which covers binary arithmetic - * expressions between a column and a scalar. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum, value, int outputColumn) { - this.colNum = colNum; - this.value = value; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = ((vector[0]), (int) value); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt deleted file mode 100644 index 93a441a..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt +++ /dev/null @@ -1,165 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - - -/* - * Because of the templatized nature of the code, either or both - * of these ColumnVector imports may be needed. Listing both of them - * rather than using ....vectorization.*; - */ -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template DateTimeScalarArithmeticIntervalColumnWithConvert.txt. - * Implements a vectorized arithmetic operator with a scalar on the left and a - * column vector on the right. The result is output to an output column vector. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public ( value, int colNum, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - /** - * Method to evaluate scalar-column operation in vectorized fashion. - * - * @batch a package of rows with each column stored in a vector - */ - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = (value, (int) vector[0]); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - } - } else { /* there are nulls */ - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt index 55193ac..2351230 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareColumn.txt deleted file mode 100644 index 353e849..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareColumn.txt +++ /dev/null @@ -1,445 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; - -/** - * Generated from template FilterDecimalColumnCompareColumn.txt, which covers binary comparison - * filter expressions between two columns. Output is not produced in a separate column. - * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - - public (int colNum1, int colNum2) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; - DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; - int[] sel = batch.selected; - boolean[] nullPos1 = inputColVector1.isNull; - boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - HiveDecimalWritable[] vector1 = inputColVector1.vector; - HiveDecimalWritable[] vector2 = inputColVector2.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // handle case where neither input has nulls - if (inputColVector1.noNulls && inputColVector2.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - - /* Either all must remain selected or all will be eliminated. - * Repeating property will not change. - */ - if (!(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - - // handle case where only input 2 has nulls - } else if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos2[0] || - !(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - - // no need to check for nulls in input 1 - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - - // no values will qualify because every comparison will be with NULL - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - - // handle case where only input 1 has nulls - } else if (inputColVector2.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos1[0] || - !(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - return; - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - - // if repeating value is null then every comparison will fail so nothing qualifies - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - - // handle case where both inputs have nulls - } else { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos1[0] || nullPos2[0] || - !(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i] && !nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i] && !nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("decimal"), - VectorExpressionDescriptor.ArgumentType.getType("decimal")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt new file mode 100644 index 0000000..a2352c6 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -0,0 +1,445 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * Generated from template FilterDecimalColumnCompareColumn.txt, which covers binary comparison + * filter expressions between two columns. Output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; + DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; + HiveDecimalWritable[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("decimal"), + VectorExpressionDescriptor.ArgumentType.getType("decimal")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt new file mode 100644 index 0000000..bdd39b9 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; + +/** + * This is a generated class to evaluate a comparison on a vector of decimal + * values. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private HiveDecimal value; + + public (int colNum, HiveDecimal value) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + HiveDecimalWritable[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(DecimalUtil.compare(vector[0], value) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(DecimalUtil.compare(vector[0], value) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("decimal"), + VectorExpressionDescriptor.ArgumentType.getType("decimal")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareScalar.txt deleted file mode 100644 index bdd39b9..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareScalar.txt +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.common.type.HiveDecimal; - -/** - * This is a generated class to evaluate a comparison on a vector of decimal - * values. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private HiveDecimal value; - - public (int colNum, HiveDecimal value) { - this.colNum = colNum; - this.value = value; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!(DecimalUtil.compare(vector[0], value) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - if (!(DecimalUtil.compare(vector[0], value) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else { - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - } - - // Change the selected vector - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("decimal"), - VectorExpressionDescriptor.ArgumentType.getType("decimal")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareColumn.txt deleted file mode 100644 index 0608016..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareColumn.txt +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.common.type.HiveDecimal; - -/** - * This is a generated class to evaluate a comparison on a vector of decimal - * values. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private HiveDecimal value; - - public (HiveDecimal value, int colNum) { - this.colNum = colNum; - this.value = value; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!(DecimalUtil.compare(value, vector[0]) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - if (!(DecimalUtil.compare(value, vector[0]) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else { - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - } - - // Change the selected vector - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("decimal"), - VectorExpressionDescriptor.ArgumentType.getType("decimal")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt new file mode 100644 index 0000000..0608016 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; + +/** + * This is a generated class to evaluate a comparison on a vector of decimal + * values. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private HiveDecimal value; + + public (HiveDecimal value, int colNum) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + HiveDecimalWritable[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(DecimalUtil.compare(value, vector[0]) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(DecimalUtil.compare(value, vector[0]) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("decimal"), + VectorExpressionDescriptor.ArgumentType.getType("decimal")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeColumnCompareIntervalDayTimeColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeColumnCompareIntervalDayTimeColumn.txt new file mode 100644 index 0000000..8d9bdf1 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeColumnCompareIntervalDayTimeColumn.txt @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterIntervalDayTimeColumnCompareColumn.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type, however output is not + * produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + public (int colNum1, int colNum2) { + super(colNum1, colNum2); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeColumnCompareIntervalDayTimeScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeColumnCompareIntervalDayTimeScalar.txt new file mode 100644 index 0000000..7022b4f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeColumnCompareIntervalDayTimeScalar.txt @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterIntervalDayTimeColumnCompareScalar.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type, however output is not + * produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + public (int colNum, HiveIntervalDayTime value) { + super(colNum, value.pisaTimestampUpdate(new PisaTimestamp())); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeScalarCompareIntervalDayTimeColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeScalarCompareIntervalDayTimeColumn.txt new file mode 100644 index 0000000..d227bf0 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterIntervalDayTimeScalarCompareIntervalDayTimeColumn.txt @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterIntervalDayTimeScalarCompareColumn.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type, however output is not + * produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + public (HiveIntervalDayTime value, int colNum) { + super(value.pisaTimestampUpdate(new PisaTimestamp()), colNum); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..0c8321f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterColumnCompareColumn.txt, which covers binary comparison + * expressions between two columns, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector1 = () batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + [] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // filter rows with NULL on left input + int newSize; + newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // filter rows with NULL on right input + newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // All rows with nulls have been filtered out, so just do normal filter for non-null case + if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!(vector1[0] inputColVector2.(0))) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + value1 = vector1[0]; + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (value1 inputColVector2.(i)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (value1 inputColVector2.(i)) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + value2 = inputColVector2.(0); + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] value2) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i] value2) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] inputColVector2.(i)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i] inputColVector2.(i)) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampScalar.txt new file mode 100644 index 0000000..7e4d55e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampScalar.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterColumnCompareScalar.txt, which covers binary comparison + * expressions between a column and a scalar, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, Timestamp value) { + super(colNum, new PisaTimestamp(value).()); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt new file mode 100644 index 0000000..ba6ca66 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterScalarCompareTimestampColumn.txt, which covers comparison + * expressions between a long/double scalar and a timestamp column, however output is not produced + * in a separate column. The selected vector of the input {@link VectorizedRowBatch} is updated + * for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + + public ( value, int colNum) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!(value inputColVector.(0))) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + if (!(value inputColVector.(0))) { + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + } + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareTimestampColumn.txt deleted file mode 100644 index e0e5022..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareTimestampColumn.txt +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Generated from template FilterScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a long or double scalar and a column, however output is not produced in a separate column. - * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. - */ -public class extends { - - public ( value, int colNum) { - super(TimestampUtils.(value), colNum); - } - - public () { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("timestamp")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt new file mode 100644 index 0000000..12f73da --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterTimestampColumnBetween.txt, which covers [NOT] BETWEEN filter + * expressions where a column is [NOT] between one scalar and another. + * Output is not produced in a separate column. The selected vector of the input + * {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + + // The comparison is of the form "column BETWEEN leftValue AND rightValue" + private PisaTimestamp leftValue; + private PisaTimestamp rightValue; + private PisaTimestamp scratchValue; + + public (int colNum, Timestamp leftValue, Timestamp rightValue) { + this.colNum = colNum; + this.leftValue = new PisaTimestamp(leftValue); + this.rightValue = new PisaTimestamp(rightValue); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. + // Repeating property will not change. + if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. + // Repeating property will not change. + if (!nullPos[0]) { + if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt new file mode 100644 index 0000000..d10be96 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterColumnCompareColumn.txt, which covers binary comparison + * expressions between two columns, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + inputColVector2 = () batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + [] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // filter rows with NULL on left input + int newSize; + newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // filter rows with NULL on right input + newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // All rows with nulls have been filtered out, so just do normal filter for non-null case + if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!(inputColVector1.(0) vector2[0])) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + value1 = inputColVector1.(0); + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (value1 vector2[i]) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (value1 vector2[i]) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + value2 = vector2[0]; + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.(i) value2) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.(i) value2) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.(i) vector2[i]) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.(i) vector2[i]) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt new file mode 100644 index 0000000..31c3f6b --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterTimestampColumnCompareScalar.txt, which covers comparison + * expressions between a timestamp column and a long/double scalar, however output is not produced + * in a separate column. The selected vector of the input {@link VectorizedRowBatch} is updated + * for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + + public (int colNum, value) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!(inputColVector.(0) value)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + if (!(inputColVector.(0) value)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + } + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareScalar.txt deleted file mode 100644 index 0c37b4d..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareScalar.txt +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Generated from template FilterTimestampColumnCompareScalar.txt, which covers comparison - * expressions between a timestamp column and a long or double scalar, however output is not - * produced in a separate column. - * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. - */ -public class extends { - - public (int colNum, value) { - super(colNum, TimestampUtils.(value)); - } - - public () { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("timestamp"), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..746b297 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterTimestampColumnCompareTimestampColumn.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type, however output is not + * produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + public (int colNum1, int colNum2) { + super(colNum1, colNum2); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumnBase.txt new file mode 100644 index 0000000..b5a7a7a --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumnBase.txt @@ -0,0 +1,429 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * Generated from template FilterTimestampColumnCompareColumn.txt, which covers binary comparison + * filter expressions between two columns. Output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index d13fecf..f744d9b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -15,23 +15,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** - * Generated from template FilterTimestampColumnCompareTimestampScalar.txt, which covers comparison - * expressions between a timestamp column and a timestamp scalar, however output is not - * produced in a separate column. + * Generated from template FilterTimestampColumnCompareScalar.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type, however output is not + * produced in a separate column. * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. */ public class extends { - public (int colNum, long value) { - super(colNum, value); + public (int colNum, Timestamp value) { + super(colNum, new PisaTimestamp(value)); } public () { @@ -51,4 +53,4 @@ public class extends { VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); } -} \ No newline at end of file +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalarBase.txt new file mode 100644 index 0000000..c84b4bf --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalarBase.txt @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterColumnCompareScalar.txt, which covers binary comparison + * expressions between a column and a scalar, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + + public (int colNum, PisaTimestamp value) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!(inputColVector.compareTo(0, value) 0)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + if (!(inputColVector.compareTo(0, value) 0)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + } + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt new file mode 100644 index 0000000..c3cd3b4 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareLongDoubleColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterScalarCompareColumn.txt, which covers binary comparison + * expressions between a scalar and a column, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (Timestamp value, int colNum) { + super(new PisaTimestamp(value).(), colNum); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index a37db3d..05ab310 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -15,22 +15,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** - * Generated from template FilterTimestampScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a timestamp scalar and a column, however output is not produced in a separate column. + * Generated from template FilterTimestampScalarCompareTimestampColumn.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type, however output is not + * produced in a separate column. * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. */ public class extends { - public (long value, int colNum) { - super(value, colNum); + public (Timestamp value, int colNum) { + super(new PisaTimestamp(value), colNum); } public () { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumnBase.txt new file mode 100644 index 0000000..608faef --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumnBase.txt @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; + +/** + * This is a generated class to evaluate a comparison on a vector of timestamp + * values. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + + public (PisaTimestamp value, int colNum) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(inputColVector.compareTo(value, 0) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(inputColVector.compareTo(value, 0) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeColumnWithConvert.txt deleted file mode 100644 index c182557..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeColumnWithConvert.txt +++ /dev/null @@ -1,177 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template IntervalColumnArithmeticDateTimeColumnWithConvert.txt, which covers binary arithmetic - * expressions between columns. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum1, int colNum2, int outputColumn) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector1 = () batch.cols[colNum1]; - inputColVector2 = () batch.cols[colNum2]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - [] vector1 = inputColVector1.vector; - [] vector2 = inputColVector2.vector; - [] outputVector = outputColVector.vector; - - // arg1 is interval type, arg2 is datetime type - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first - NullUtil.propagateNullsColCol( - inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - - /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or - * more inputs are null. This is to improve speed by avoiding - * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputVector[0] = ((vector2[0]), (int) vector1[0]); - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector2[0]), (int) vector1[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector2[0]), (int) vector1[i]); - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector2[i]), (int) vector1[0]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector2[i]), (int) vector1[0]); - } - } - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector2[i]), (int) vector1[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector2[i]), (int) vector1[i]); - } - } - } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and - * NaN for double. This is to prevent possible later zero-divide errors - * in complex arithmetic expressions like col2 / (col1 - 1) - * in the case when some col1 entries are null. - */ - NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} - diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt deleted file mode 100644 index 8fa3563..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt +++ /dev/null @@ -1,154 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template IntervalColumnArithmeticDateTimeScalarWithConvert.txt, which covers binary arithmetic - * expressions between a column and a scalar. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum, value, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // arg1 is interval, arg2 is datetime - - if (inputColVector.isRepeating) { - outputVector[0] = (value, (int) vector[0]); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeColumnCompareIntervalDayTimeColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeColumnCompareIntervalDayTimeColumn.txt new file mode 100644 index 0000000..bf62b78 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeColumnCompareIntervalDayTimeColumn.txt @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + + +/** + * Generated from template IntervalDayTimeColumnCompareColumn.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type. The boolean output + * is stored in a separate boolean column. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeColumnCompareIntervalDayTimeScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeColumnCompareIntervalDayTimeScalar.txt new file mode 100644 index 0000000..1abb4a3 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeColumnCompareIntervalDayTimeScalar.txt @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + + +/** + * Generated from template IntervalDayTimeColumnCompareScalar.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type. The boolean output + * is stored in a separate boolean column. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, HiveIntervalDayTime value, int outputColumn) { + super(colNum, value.pisaTimestampUpdate(new PisaTimestamp()), outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeScalarCompareIntervalDayTimeColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeScalarCompareIntervalDayTimeColumn.txt new file mode 100644 index 0000000..26762ff --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalDayTimeScalarCompareIntervalDayTimeColumn.txt @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + + +/** + * Generated from template IntervalDayTimeColumnCompareScalar.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type. The boolean output + * is stored in a separate boolean column. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (HiveIntervalDayTime value, int colNum, int outputColumn) { + super(value.pisaTimestampUpdate(new PisaTimestamp()), colNum, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt deleted file mode 100644 index 0464a5e..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - - -/* - * Because of the templatized nature of the code, either or both - * of these ColumnVector imports may be needed. Listing both of them - * rather than using ....vectorization.*; - */ -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template IntervalScalarArithmeticDateTimeColumnWithConvert.txt. - * Implements a vectorized arithmetic operator with a scalar on the left and a - * column vector on the right. The result is output to an output column vector. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public ( value, int colNum, int outputColumn) { - this.colNum = colNum; - this.value = value; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - /** - * Method to evaluate scalar-column operation in vectorized fashion. - * - * @batch a package of rows with each column stored in a vector - */ - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // arg1 is interval, arg2 is datetime - - if (inputColVector.isRepeating) { - outputVector[0] = ((vector[0]), (int) value); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - } - } else { /* there are nulls */ - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt new file mode 100644 index 0000000..7ae84b7 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type interval_year_month (months). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector2[0], (int) vector1[0]); + } else if (inputColVector1.isRepeating) { + long value1 = vector1[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) value1); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) value1); + } + } + } else if (inputColVector2.isRepeating) { + long value2 = vector2[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value2, (int) vector1[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value2, (int) vector1[i]); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) vector1[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) vector1[i]); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt new file mode 100644 index 0000000..2f2522d --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type interval_year_mont (epochMonths). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(value, (int) vector[0]); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt new file mode 100644 index 0000000..b3da89f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Interval_Year_Month (months). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector2.asScratchPisaTimestamp(0), (int) vector1[0], + scratchPisaTimestamp)); + } else if (inputColVector1.isRepeating) { + long value1 = vector1[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.asScratchPisaTimestamp(i), (int) value1, + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.asScratchPisaTimestamp(i), (int) value1, + scratchPisaTimestamp)); + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = inputColVector2.asScratchPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value2, (int) vector1[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value2, (int) vector1[i], + scratchPisaTimestamp)); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.asScratchPisaTimestamp(i), (int) vector1[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.asScratchPisaTimestamp(i), (int) vector1[i], + scratchPisaTimestamp)); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt new file mode 100644 index 0000000..81f2a77 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, Timestamp value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp(value); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type interval_year_month (epochMonths). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[0], + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt new file mode 100644 index 0000000..3f4f05f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateTimeScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type date. + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type Date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector[0], (int) value); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt new file mode 100644 index 0000000..47d611e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type timestamp. + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector.asScratchPisaTimestamp(0), (int) value, + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..e804e2a --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnCompareTimestampColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector1 = () batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + [] vector1 = inputColVector1.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = vector1[0] inputColVector2.(0) ? 1 : 0; + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[0] inputColVector2.(i) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[0] inputColVector2.(i) ? 1 : 0; + } + } + } else if (inputColVector2.isRepeating) { + value2 = inputColVector2.(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] value2 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value2 ? 1 : 0; + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] inputColVector2.(i) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] inputColVector2.(i) ? 1 : 0; + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt new file mode 100644 index 0000000..90720ba --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnCompareTimestampScalar.txt, which covers binary comparison + * expressions between a column and a scalar. The boolean output is stored in a + * separate boolean column. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + private int outputColumn; + + public (int colNum, Timestamp value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp(value).(); + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector1 = () batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector1.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + [] vector1 = inputColVector1.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector1.noNulls; + if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = vector1[0] value ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } + } + } else { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = vector1[0] value ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = vector1[i] value ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = vector1[i] value ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt new file mode 100644 index 0000000..7d3856a --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ScalarCompareTimestamp.txt, which covers comparison + * expressions between a long/double scalar and a column. The boolean output is stored in a + * separate boolean column. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + private int outputColumn; + + public ( value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = value inputColVector.(0) ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = value inputColVector.(0) ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareTimestampColumn.txt deleted file mode 100644 index 7867610..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareTimestampColumn.txt +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import java.sql.Timestamp; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.io.LongWritable; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Generated from template ScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a timestamp column and a long or double scalar. The boolean output - * is stored in a separate boolean column. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. - */ -public class extends { - - public ( value, int colNum, int outputColumn) { - super(TimestampUtils.(value), colNum, outputColumn); - } - - public () { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("timestamp")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt new file mode 100644 index 0000000..b086a88 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticDateColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumnBase.txt new file mode 100644 index 0000000..7f5496c --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumnBase.txt @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template TimestampColumnArithmeticDateColumnBase.txt, which covers binary arithmetic + * expressions between columns. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + + // Input #2 is type date. + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(0), + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + } else if (inputColVector1.isRepeating) { + PisaTimestamp value1 = inputColVector1.asScratchPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value1, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value1, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), + value2, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), + value2, + i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt new file mode 100644 index 0000000..b8404db --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticDateScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, long value, int outputColumn) { + super(colNum, value, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalarBase.txt new file mode 100644 index 0000000..c2ddd67 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalarBase.txt @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template TimestampColumnArithmeticDateScalarBase.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(0), value, 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..2f33920 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,177 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + + // Input #2 is type Interval_Year_Month (months). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(0), (int) vector2[0], + scratchPisaTimestamp)); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(0), (int) vector2[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(0), (int) vector2[i], + scratchPisaTimestamp)); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) vector2[0], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) vector2[0], + scratchPisaTimestamp)); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) vector2[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) vector2[i], + scratchPisaTimestamp)); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt new file mode 100644 index 0000000..9f5c24e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(0), (int) value, + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.asScratchPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt new file mode 100644 index 0000000..dfd45ab --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..0e52f6c --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumnBase.txt @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampColumnBase.txt, which covers binary arithmetic + * expressions between columns. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + + // Input #2 is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + + // Output is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(0), inputColVector2.asScratchPisaTimestamp(0), 0); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(0), inputColVector2.asScratchPisaTimestamp(i), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(0), inputColVector2.asScratchPisaTimestamp(i), i); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), inputColVector2.asScratchPisaTimestamp(0), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), inputColVector2.asScratchPisaTimestamp(0), i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), inputColVector2.asScratchPisaTimestamp(i), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), inputColVector2.asScratchPisaTimestamp(i), i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt new file mode 100644 index 0000000..f8004ff --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hive.common.util.DateUtils; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, value, int outputColumn) { + super(colNum, , outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalarBase.txt new file mode 100644 index 0000000..a0de1b3 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalarBase.txt @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampScalarBase.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, PisaTimestamp value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type timestamp/interval_day_time. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(0), value, 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.asScratchPisaTimestamp(i), value, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt new file mode 100644 index 0000000..0fc402d --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnArithmeticColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + inputColVector2 = () batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + [] vector2 = inputColVector2.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = inputColVector1.(0) vector2[0] ? 1 : 0; + } else if (inputColVector1.isRepeating) { + value1 = inputColVector1.(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value1 vector2[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = value1 vector2[i] ? 1 : 0; + } + } + } else if (inputColVector2.isRepeating) { + value2 = vector2[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.(i) vector2[0] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.(i) vector2[0] ? 1 : 0; + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.(i) vector2[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.(i) vector2[i] ? 1 : 0; + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt new file mode 100644 index 0000000..43321644 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template TimestampColumnCompareScalar.txt, which covers comparison + * expressions between a Timestamp column and a long/double scalar. The boolean output is stored + * in a separate boolean column. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + private int outputColumn; + + public (int colNum, value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.(0) value ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inputColVector.(0) value ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareScalar.txt deleted file mode 100644 index da33281..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareScalar.txt +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - - -/** - * Generated from template TimestampColumnCompareScalar.txt, which covers comparison - * expressions between a timestamp column and a long or double scalar. The boolean output - * is stored in a separate boolean column. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. - */ -public class extends { - - public (int colNum, value, int outputColumn) { - super(colNum, TimestampUtils.(value), outputColumn); - } - - public () { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("timestamp"), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..fb82d5e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + + +/** + * Generated from template TimestampColumnCompareTimestampColumn.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type. The boolean output + * is stored in a separate boolean column. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumnBase.txt new file mode 100644 index 0000000..302be41 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumnBase.txt @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template TimestampColumnCompareColumn.txt, which covers comparision + * expressions between timestamp columns. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = inputColVector1.compareTo(0, inputColVector2.asScratchPisaTimestamp(0)) 0 ? 1 : 0; + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(0, inputColVector2.asScratchPisaTimestamp(i)) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(0, inputColVector2.asScratchPisaTimestamp(i)) 0 ? 1 : 0; + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = inputColVector2.asScratchPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value2) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value2) 0 ? 1 : 0; + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, inputColVector2.asScratchPisaTimestamp(i)) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, inputColVector2.asScratchPisaTimestamp(i)) 0 ? 1 : 0; + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 46534b4..58c3352 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -15,23 +15,27 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** - * Generated from template TimestampColumnCompareTimestampScalar.txt, which covers comparison - * expressions between a timestamp column and a timestamp scalar. The boolean output + * Generated from template TimestampColumnCompareTimestampScalar.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type. The boolean output * is stored in a separate boolean column. */ public class extends { - public (int colNum, long value, int outputColumn) { - super(colNum, value, outputColumn); + private static final long serialVersionUID = 1L; + + public (int colNum, Timestamp value, int outputColumn) { + super(colNum, new PisaTimestamp(value), outputColumn); } public () { diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalarBase.txt new file mode 100644 index 0000000..ce940a4 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalarBase.txt @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template TimestampColumnCompareTimestampScalar.txt, which covers binary comparison + * expressions between a column and a scalar. The boolean output is stored in a + * separate boolean column. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + + public (int colNum, PisaTimestamp value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector1.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector1.noNulls; + if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } + } else { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt new file mode 100644 index 0000000..8f89bd4 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hive.common.util.DateUtils; + +/** + * Generated from template TimestampScalarArithmeticDateColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public ( value, int colNum, int outputColumn) { + super(, colNum, outputColumn); + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumnBase.txt new file mode 100644 index 0000000..94be4f6 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumnBase.txt @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template TimestampScalarArithmeticDateColumnBase.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (PisaTimestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type date. + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..e9b9e67 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (PisaTimestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type Interval_Year_Month (months). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(value, (int) vector[0], + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt new file mode 100644 index 0000000..6725908 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hive.common.util.DateUtils; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticTimestampColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public ( value, int colNum, int outputColumn) { + super(, colNum, outputColumn); + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..0ff9226 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumnBase.txt @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticTimestampColumnBase.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (PisaTimestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type timestamp/interval_day_time. + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type timestamp/interval_day_time. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.( + value, inputColVector2.asScratchPisaTimestamp(0), 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, inputColVector2.asScratchPisaTimestamp(i), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, inputColVector2.asScratchPisaTimestamp(i), i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, inputColVector2.asScratchPisaTimestamp(i), i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, inputColVector2.asScratchPisaTimestamp(i), i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareLongDoubleColumn.txt new file mode 100644 index 0000000..9e855e8 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareLongDoubleColumn.txt @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnCompareScalar.txt, which covers binary comparison + * expressions between a column and a scalar. The boolean output is stored in a + * separate boolean column. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (Timestamp value, int colNum, int outputColumn) { + super(new PisaTimestamp(value).(), colNum, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 9468a66..df9f3c9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -15,30 +15,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; import java.sql.Timestamp; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + /** - * Generated from template TimestampScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a timestamp column and a timestamp scalar. The boolean output + * Generated from template TimestampColumnCompareTimestampScalar.txt, which covers comparison + * expressions between a datetime/interval column and a scalar of the same type. The boolean output * is stored in a separate boolean column. */ public class extends { - public (long value, int colNum, int outputColumn) { - super(value, colNum, outputColumn); + private static final long serialVersionUID = 1L; + + public (Timestamp value, int colNum, int outputColumn) { + super(new PisaTimestamp(value), colNum, outputColumn); } public () { diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumnBase.txt new file mode 100644 index 0000000..bd345e7 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumnBase.txt @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ScalarCompareTimestamp.txt, which covers comparison + * expressions between a long/double scalar and a column. The boolean output is stored in a + * separate boolean column. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + + public (PisaTimestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector2.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector2.noNulls; + if (inputColVector2.noNulls) { + if (inputColVector2.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } + } else { + if (inputColVector2.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt index 57a0e5d..a9a3b6d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt new file mode 100644 index 0000000..3cdf405 --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** +* . Vectorized implementation for MIN/MAX aggregates. +*/ +@Description(name = "", + value = "") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + * class for storing the current aggregate value. + */ + static private final class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private final PisaTimestamp value; + + /** + * Value is explicitly (re)initialized in reset() + */ + transient private boolean isNull = true; + + public Aggregation() { + value = new PisaTimestamp(); + } + + public void checkValue(TimestampColumnVector colVector, int index) { + if (isNull) { + isNull = false; + colVector.pisaTimestampUpdate(this.value, index); + } else if (colVector.compareTo(this.value, index) 0) { + colVector.pisaTimestampUpdate(this.value, index); + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset () { + isNull = true; + this.value.reset(); + } + } + + private VectorExpression inputExpression; + private transient VectorExpressionWriter resultWriter; + + public (VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public () { + super(); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( + desc.getParameters().get(0)); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize); + } + } + } else { + if (inputColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, inputColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, inputColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + // Repeating use index 0. + myagg.checkValue(inputColVector, 0); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColVector, selection[i]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColVector, i); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + // Repeating use index 0. + myagg.checkValue(inputColVector, 0); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + // Repeating use index 0. + myagg.checkValue(inputColVector, 0); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + j); + myagg.checkValue(inputColVector, i); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColVector, i); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls && + (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { + myagg.isNull = false; + inputColVector.pisaTimestampUpdate(myagg.value, 0); + } + return; + } + + if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, + batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, + batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + if (myagg.isNull) { + myagg.isNull = false; + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + else if (inputColVector.compareTo(myagg.value, i) 0) { + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + inputColVector.pisaTimestampUpdate(myagg.value, selected[0]); + myagg.isNull = false; + } + + for (int i=0; i< batchSize; ++i) { + int sel = selected[i]; + if (inputColVector.compareTo(myagg.value, sel) 0) { + inputColVector.pisaTimestampUpdate(myagg.value, sel); + } + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 0) { + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + if (myagg.isNull) { + inputColVector.pisaTimestampUpdate(myagg.value, 0); + myagg.isNull = false; + } + + for (int i=0;i 0) { + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new Aggregation(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + Aggregation myAgg = (Aggregation) agg; + myAgg.reset(); + } + + @Override + public Object evaluateOutput( + AggregationBuffer agg) throws HiveException { + Aggregation myagg = (Aggregation) agg; + if (myagg.isNull) { + return null; + } + else { + return resultWriter.writeValue(myagg.value); + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { + return resultWriter.getObjectInspector(); + } + + @Override + public int getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2(), + model.memoryAlign()); + } + + public VectorExpression getInputExpression() { + return inputExpression; + } + + public void setInputExpression(VectorExpression inputExpression) { + this.inputExpression = inputExpression; + } +} + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java index 95dbf8d..5de055c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java @@ -18,50 +18,24 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.sql.Timestamp; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; public final class TimestampUtils { - /** - * Store the given timestamp in nanoseconds into the timestamp object. - * @param timeInNanoSec Given timestamp in nanoseconds - * @param t The timestamp object - */ - public static void assignTimeInNanoSec(long timeInNanoSec, Timestamp t) { - /* - * java.sql.Timestamp consists of a long variable to store milliseconds and an integer variable for nanoseconds. - * The long variable is used to store only the full seconds converted to millis. For example for 1234 milliseconds, - * 1000 is stored in the long variable, and 234000000 (234 converted to nanoseconds) is stored as nanoseconds. - * The negative timestamps are also supported, but nanoseconds must be positive therefore millisecond part is - * reduced by one second. - */ - long integralSecInMillis = (timeInNanoSec / 1000000000) * 1000; // Full seconds converted to millis. - long nanos = timeInNanoSec % 1000000000; // The nanoseconds. - if (nanos < 0) { - nanos = 1000000000 + nanos; // The positive nano-part that will be added to milliseconds. - integralSecInMillis = ((timeInNanoSec / 1000000000) - 1) * 1000; // Reduce by one second. - } - t.setTime(integralSecInMillis); - t.setNanos((int) nanos); - } - - public static long getTimeNanoSec(Timestamp t) { - long time = t.getTime(); - int nanos = t.getNanos(); - return (time * 1000000) + (nanos % 1000000); - } + static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); + static final long NANOSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1); - public static long secondsToNanoseconds(long seconds) { - return seconds * 1000000000; - } - - public static long doubleToNanoseconds(double d) { - return (long) (d * 1000000000); + public static long daysToNanoseconds(long daysSinceEpoch) { + return DateWritable.daysToMillis((int) daysSinceEpoch) * NANOSECONDS_PER_MILLISECOND; } - public static long daysToNanoseconds(long daysSinceEpoch) { - return DateWritable.daysToMillis((int) daysSinceEpoch) * 1000000; + public static TimestampWritable timestampColumnVectorWritable( + TimestampColumnVector timestampColVector, int elementNum, + TimestampWritable timestampWritable) { + timestampWritable.set(timestampColVector.asScratchTimestamp(elementNum)); + return timestampWritable; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index 92b4a07..794b4d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.sql.Timestamp; import java.util.List; import org.slf4j.Logger; @@ -228,7 +227,26 @@ void assign(int batchIndex, Object object) { } } - private class TimestampAssigner extends AbstractLongAssigner { + private abstract class AbstractTimestampAssigner extends Assigner { + + protected TimestampColumnVector colVector; + + AbstractTimestampAssigner(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (TimestampColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + } + + private class TimestampAssigner extends AbstractTimestampAssigner { TimestampAssigner(int columnIndex) { super(columnIndex); @@ -239,9 +257,7 @@ void assign(int batchIndex, Object object) { if (object == null) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { - TimestampWritable tw = (TimestampWritable) object; - Timestamp t = tw.getTimestamp(); - vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + colVector.set(batchIndex, ((TimestampWritable) object).getTimestamp()); } } } @@ -264,7 +280,7 @@ void assign(int batchIndex, Object object) { } } - private class IntervalDayTimeAssigner extends AbstractLongAssigner { + private class IntervalDayTimeAssigner extends AbstractTimestampAssigner { IntervalDayTimeAssigner(int columnIndex) { super(columnIndex); @@ -277,7 +293,7 @@ void assign(int batchIndex, Object object) { } else { HiveIntervalDayTimeWritable idtw = (HiveIntervalDayTimeWritable) object; HiveIntervalDayTime idt = idtw.getHiveIntervalDayTime(); - vector[batchIndex] = DateUtils.getIntervalDayTimeTotalNanos(idt); + colVector.set(batchIndex, idt.pisaTimestampUpdate(colVector.useScratchPisaTimestamp())); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index befe2fc..463c8a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -165,6 +165,17 @@ protected void assignDecimal(HiveDecimalWritable hdw, int index) { } } + private static abstract class VectorTimestampColumnAssign + extends VectorColumnAssignVectorBase { + + protected void assignTimestamp(Timestamp value, int index) { + outCol.set(index, value); + } + protected void assignTimestamp(TimestampWritable tw, int index) { + outCol.set(index, tw.getTimestamp()); + } + } + public static VectorColumnAssign[] buildAssigners(VectorizedRowBatch outputBatch) throws HiveException { @@ -313,19 +324,17 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { }.init(outputBatch, (LongColumnVector) destCol); break; case TIMESTAMP: - outVCA = new VectorLongColumnAssign() { + outVCA = new VectorTimestampColumnAssign() { @Override public void assignObjectValue(Object val, int destIndex) throws HiveException { if (val == null) { assignNull(destIndex); } else { - TimestampWritable bw = (TimestampWritable) val; - Timestamp t = bw.getTimestamp(); - assignLong(TimestampUtils.getTimeNanoSec(t), destIndex); + assignTimestamp((TimestampWritable) val, destIndex); } } - }.init(outputBatch, (LongColumnVector) destCol); + }.init(outputBatch, (TimestampColumnVector) destCol); break; case DATE: outVCA = new VectorLongColumnAssign() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 6673509..0949145 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -22,8 +22,6 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -57,6 +55,11 @@ protected int[] decimalIndices; /** + * indices of TIMESTAMP primitive keys. + */ + protected int[] timestampIndices; + + /** * Helper class for looking up a key value based on key index. */ public class KeyLookupHelper { @@ -64,11 +67,13 @@ public int doubleIndex; public int stringIndex; public int decimalIndex; + public int timestampIndex; private static final int INDEX_UNUSED = -1; private void resetIndices() { - this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = INDEX_UNUSED; + this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = + timestampIndex = INDEX_UNUSED; } public void setLong(int index) { resetIndices(); @@ -89,6 +94,11 @@ public void setDecimal(int index) { resetIndices(); this.decimalIndex = index; } + + public void setTimestamp(int index) { + resetIndices(); + this.timestampIndex= index; + } } /** @@ -103,6 +113,7 @@ public void setDecimal(int index) { protected int doubleIndicesIndex; protected int stringIndicesIndex; protected int decimalIndicesIndex; + protected int timestampIndicesIndex; protected VectorColumnSetInfo(int keyCount) { this.keyCount = keyCount; @@ -117,6 +128,8 @@ protected VectorColumnSetInfo(int keyCount) { stringIndicesIndex = 0; decimalIndices = new int[this.keyCount]; decimalIndicesIndex = 0; + timestampIndices = new int[this.keyCount]; + timestampIndicesIndex = 0; indexLookup = new KeyLookupHelper[this.keyCount]; } @@ -153,6 +166,12 @@ protected void addKey(String outputType) throws HiveException { ++decimalIndicesIndex; break; + case TIMESTAMP: + timestampIndices[timestampIndicesIndex] = addIndex; + indexLookup[addIndex].setTimestamp(timestampIndicesIndex); + ++timestampIndicesIndex; + break; + default: throw new HiveException("Unexpected column vector type " + columnVectorType); } @@ -165,5 +184,6 @@ protected void finishAdding() { doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex); stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex); decimalIndices = Arrays.copyOf(decimalIndices, decimalIndicesIndex); + timestampIndices = Arrays.copyOf(timestampIndices, timestampIndicesIndex); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index c56903e..97542df 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -22,8 +22,6 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -189,6 +187,32 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa } } + private class TimestampCopyRow extends CopyRow { + + TimestampCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + TimestampColumnVector inColVector = (TimestampColumnVector) inBatch.cols[inColumnIndex]; + TimestampColumnVector outColVector = (TimestampColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.setElement(outBatchIndex, 0, inColVector); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.setElement(outBatchIndex, inBatchIndex, inColVector); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } private CopyRow[] subRowToBatchCopiersByValue; private CopyRow[] subRowToBatchCopiersByReference; @@ -212,6 +236,10 @@ public void init(VectorColumnMapping columnMapping) throws HiveException { copyRowByValue = new LongCopyRow(inputColumn, outputColumn); break; + case TIMESTAMP: + copyRowByValue = new TimestampCopyRow(inputColumn, outputColumn); + break; + case DOUBLE: copyRowByValue = new DoubleCopyRow(inputColumn, outputColumn); break; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 4d86db6..427eb8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -20,7 +20,6 @@ import java.io.EOFException; import java.io.IOException; -import java.sql.Timestamp; import java.util.List; import org.slf4j.Logger; @@ -204,7 +203,14 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class TimestampReader extends AbstractLongReader { + private abstract class AbstractTimestampReader extends Reader { + + AbstractTimestampReader(int columnIndex) { + super(columnIndex); + } + } + + private class TimestampReader extends AbstractTimestampReader { DeserializeRead.ReadTimestampResults readTimestampResults; @@ -215,16 +221,16 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { @Override void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { deserializeRead.readTimestamp(readTimestampResults); - Timestamp t = readTimestampResults.getTimestamp(); - colVector.vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + colVector.set(batchIndex, readTimestampResults.getTimestamp()); } } + } private class IntervalYearMonthReader extends AbstractLongReader { @@ -250,7 +256,7 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class IntervalDayTimeReader extends AbstractLongReader { + private class IntervalDayTimeReader extends AbstractTimestampReader { DeserializeRead.ReadIntervalDayTimeResults readIntervalDayTimeResults; @@ -261,14 +267,14 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { @Override void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { deserializeRead.readIntervalDayTime(readIntervalDayTimeResults); - HiveIntervalDayTime hidt = readIntervalDayTimeResults.getHiveIntervalDayTime(); - colVector.vector[batchIndex] = DateUtils.getIntervalDayTimeTotalNanos(hidt); + HiveIntervalDayTime idt = readIntervalDayTimeResults.getHiveIntervalDayTime(); + colVector.set(batchIndex, idt.pisaTimestampUpdate(colVector.useScratchPisaTimestamp())); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index e221362..0b9ad55 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -43,7 +43,7 @@ // LongColumnVector --> // INT_FAMILY // DATE - // TIMESTAMP + // INTERVAL_FAMILY // // DoubleColumnVector --> // FLOAT_FAMILY @@ -56,6 +56,9 @@ // CHAR // VARCHAR // + // TimestampColumnVector --> + // TIMESTAMP + // public enum ArgumentType { NONE (0x000), INT_FAMILY (0x001), @@ -71,9 +74,9 @@ INTERVAL_DAY_TIME (0x200), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), INTERVAL_FAMILY (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value), - INT_TIMESTAMP_FAMILY (INT_FAMILY.value | TIMESTAMP.value), - INT_INTERVAL_FAMILY (INT_FAMILY.value | INTERVAL_FAMILY.value), - INT_DATETIME_INTERVAL_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value | INTERVAL_FAMILY.value), + INT_INTERVAL_YEAR_MONTH (INT_FAMILY.value | INTERVAL_YEAR_MONTH.value), + INT_DATE_INTERVAL_YEAR_MONTH (INT_FAMILY.value | DATE.value | INTERVAL_YEAR_MONTH.value), + TIMESTAMP_INTERVAL_DAY_TIME (TIMESTAMP.value | INTERVAL_DAY_TIME.value), STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value), ALL_FAMILY (0xFFF); @@ -146,10 +149,12 @@ public boolean isSameTypeOrFamily(ArgumentType other) { public static String getVectorColumnSimpleName(ArgumentType argType) { if (argType == INT_FAMILY || argType == DATE || - argType == TIMESTAMP || - argType == INTERVAL_YEAR_MONTH || - argType == INTERVAL_DAY_TIME) { + argType == INTERVAL_YEAR_MONTH + ) { return "Long"; + } else if (argType == TIMESTAMP || + argType == INTERVAL_DAY_TIME) { + return "Timestamp"; } else if (argType == FLOAT_FAMILY) { return "Double"; } else if (argType == DECIMAL) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index 4100bc5..622f4a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -22,6 +22,7 @@ import java.sql.Date; import java.sql.Timestamp; import java.util.List; + import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; @@ -31,6 +32,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -255,10 +257,29 @@ Object extract(int batchIndex) { } } - private class TimestampExtractor extends AbstractLongExtractor { + private abstract class AbstractTimestampExtractor extends Extractor { + + protected TimestampColumnVector colVector; + + AbstractTimestampExtractor(int columnIndex) { + super(columnIndex); + } + + @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (TimestampColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + } + + private class TimestampExtractor extends AbstractTimestampExtractor { + + protected Timestamp timestamp; - private Timestamp timestamp; - TimestampExtractor(int columnIndex) { super(columnIndex); object = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.create(new Timestamp(0)); @@ -269,8 +290,7 @@ Object extract(int batchIndex) { Object extract(int batchIndex) { int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - TimestampUtils.assignTimeInNanoSec(value, timestamp); + colVector.timestampUpdate(timestamp, adjustedIndex); PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.set(object, timestamp); return object; } else { @@ -282,7 +302,7 @@ Object extract(int batchIndex) { private class IntervalYearMonthExtractor extends AbstractLongExtractor { private HiveIntervalYearMonth hiveIntervalYearMonth; - + IntervalYearMonthExtractor(int columnIndex) { super(columnIndex); object = PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(new HiveIntervalYearMonth(0)); @@ -303,10 +323,10 @@ Object extract(int batchIndex) { } } - private class IntervalDayTimeExtractor extends AbstractLongExtractor { + private class IntervalDayTimeExtractor extends AbstractTimestampExtractor { private HiveIntervalDayTime hiveIntervalDayTime; - + IntervalDayTimeExtractor(int columnIndex) { super(columnIndex); object = PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(new HiveIntervalDayTime(0, 0)); @@ -317,8 +337,7 @@ Object extract(int batchIndex) { Object extract(int batchIndex) { int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, value); + hiveIntervalDayTime.set(colVector.asScratchPisaTimestamp(adjustedIndex)); PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.set(object, hiveIntervalDayTime); return object; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index fabac38..9f0ac11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -19,8 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.DataOutputBuffer; @@ -121,5 +119,17 @@ public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outpu outputColumnVector.isNull[outputBatch.size] = true; } } + for(int i=0;i 0 ? new long[longValuesCount] : EMPTY_LONG_ARRAY; doubleValues = doubleValuesCount > 0 ? new double[doubleValuesCount] : EMPTY_DOUBLE_ARRAY; decimalValues = decimalValuesCount > 0 ? new HiveDecimalWritable[decimalValuesCount] : EMPTY_DECIMAL_ARRAY; + timestampValues = timestampValuesCount > 0 ? new PisaTimestamp[timestampValuesCount] : EMPTY_TIMESTAMP_ARRAY; for(int i = 0; i < decimalValuesCount; ++i) { decimalValues[i] = new HiveDecimalWritable(HiveDecimal.ZERO); } @@ -72,7 +78,11 @@ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, byteStarts = EMPTY_INT_ARRAY; byteLengths = EMPTY_INT_ARRAY; } - isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + decimalValuesCount]; + for(int i = 0; i < timestampValuesCount; ++i) { + timestampValues[i] = new PisaTimestamp(); + } + isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + + decimalValuesCount + timestampValuesCount]; hashcode = 0; } @@ -94,6 +104,10 @@ public void setHashKey() { hashcode ^= decimalValues[i].getHiveDecimal().hashCode(); } + for (int i = 0; i < timestampValues.length; i++) { + hashcode ^= timestampValues[i].hashCode(); + } + // This code, with branches and all, is not executed if there are no string keys for (int i = 0; i < byteValues.length; ++i) { /* @@ -131,6 +145,7 @@ public boolean equals(Object that) { Arrays.equals(longValues, keyThat.longValues) && Arrays.equals(doubleValues, keyThat.doubleValues) && Arrays.equals(decimalValues, keyThat.decimalValues) && + Arrays.equals(timestampValues, keyThat.timestampValues) && Arrays.equals(isNull, keyThat.isNull) && byteValues.length == keyThat.byteValues.length && (0 == byteValues.length || bytesEquals(keyThat)); @@ -196,6 +211,16 @@ public void duplicateTo(VectorHashKeyWrapper clone) { clone.byteStarts = EMPTY_INT_ARRAY; clone.byteLengths = EMPTY_INT_ARRAY; } + if (timestampValues.length > 0) { + clone.timestampValues = new PisaTimestamp[timestampValues.length]; + for(int i = 0; i < timestampValues.length; ++i) { + clone.timestampValues[i] = new PisaTimestamp(); + clone.timestampValues[i].update(timestampValues[i]); + } + } else { + clone.timestampValues = EMPTY_TIMESTAMP_ARRAY; + } + clone.hashcode = hashcode; assert clone.equals(this); } @@ -256,14 +281,32 @@ public void assignNullDecimal(int index) { isNull[longValues.length + doubleValues.length + byteValues.length + index] = true; } + public void assignTimestamp(int index, PisaTimestamp value) { + timestampValues[index].update(value); + isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + index] = false; + } + + public void assignTimestamp(int index, TimestampColumnVector colVector, int elementNum) { + colVector.pisaTimestampUpdate(timestampValues[index], elementNum); + isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + index] = false; + } + + public void assignNullTimestamp(int index) { + isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + index] = true; + } + @Override public String toString() { - return String.format("%d[%s] %d[%s] %d[%s] %d[%s]", + return String.format("%d[%s] %d[%s] %d[%s] %d[%s] %d[%s]", longValues.length, Arrays.toString(longValues), doubleValues.length, Arrays.toString(doubleValues), byteValues.length, Arrays.toString(byteValues), - decimalValues.length, Arrays.toString(decimalValues)); + decimalValues.length, Arrays.toString(decimalValues), + timestampValues.length, Arrays.toString(timestampValues)); } public boolean getIsLongNull(int i) { @@ -315,5 +358,15 @@ public boolean getIsDecimalNull(int i) { public HiveDecimalWritable getDecimal(int i) { return decimalValues[i]; } + + public boolean getIsTimestampNull(int i) { + return isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + i]; + } + + public PisaTimestamp getTimestamp(int i) { + return timestampValues[i]; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index 6333222..1c34124 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -18,13 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; /** * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a @@ -157,27 +155,49 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { } } for(int i=0;i= 0) { + return kw.getIsTimestampNull(klh.timestampIndex)? null : + keyOutputWriter.writeValue( + kw.getTimestamp(klh.timestampIndex)); } else { throw new HiveException(String.format( - "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d", - i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex)); + "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d %d", + i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex, + klh.timestampIndex)); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index c98c260..dea38e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -22,6 +22,8 @@ import java.sql.Timestamp; import java.util.List; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -254,7 +256,7 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class TimestampWriter extends AbstractLongWriter { + private class TimestampWriter extends Writer { Timestamp scratchTimestamp; @@ -265,11 +267,11 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { @Override boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (colVector.isRepeating) { if (colVector.noNulls || !colVector.isNull[0]) { - TimestampUtils.assignTimeInNanoSec(colVector.vector[0], scratchTimestamp); + colVector.timestampUpdate(scratchTimestamp, 0); serializeWrite.writeTimestamp(scratchTimestamp); return true; } else { @@ -278,7 +280,7 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } else { if (colVector.noNulls || !colVector.isNull[batchIndex]) { - TimestampUtils.assignTimeInNanoSec(colVector.vector[batchIndex], scratchTimestamp); + colVector.timestampUpdate(scratchTimestamp, batchIndex); serializeWrite.writeTimestamp(scratchTimestamp); return true; } else { @@ -319,19 +321,23 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class IntervalDayTimeWriter extends AbstractLongWriter { + private class IntervalDayTimeWriter extends Writer { + + private HiveIntervalDayTime hiveIntervalDayTime; IntervalDayTimeWriter(int columnIndex) { super(columnIndex); + hiveIntervalDayTime = new HiveIntervalDayTime(); } @Override boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (colVector.isRepeating) { if (colVector.noNulls || !colVector.isNull[0]) { - serializeWrite.writeHiveIntervalDayTime(colVector.vector[0]); + hiveIntervalDayTime.set(colVector.asScratchPisaTimestamp(0)); + serializeWrite.writeHiveIntervalDayTime(hiveIntervalDayTime); return true; } else { serializeWrite.writeNull(); @@ -339,7 +345,8 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } else { if (colVector.noNulls || !colVector.isNull[batchIndex]) { - serializeWrite.writeHiveIntervalDayTime(colVector.vector[batchIndex]); + hiveIntervalDayTime.set(colVector.asScratchPisaTimestamp(batchIndex)); + serializeWrite.writeHiveIntervalDayTime(hiveIntervalDayTime); return true; } else { serializeWrite.writeNull(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 7e95244..dd59bf2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -50,22 +50,30 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode; import org.apache.hadoop.hive.ql.exec.vector.expressions.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFStdPopTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFStdSampTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFVarPopTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFVarSampTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong; @@ -929,20 +937,16 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI case DATE: return new ConstantVectorExpression(outCol, DateWritable.dateToDays((Date) constantValue)); case TIMESTAMP: - return new ConstantVectorExpression(outCol, TimestampUtils.getTimeNanoSec((Timestamp) constantValue)); + return new ConstantVectorExpression(outCol, (Timestamp) constantValue); case INTERVAL_YEAR_MONTH: return new ConstantVectorExpression(outCol, ((HiveIntervalYearMonth) constantValue).getTotalMonths()); case INTERVAL_DAY_TIME: - return new ConstantVectorExpression(outCol, - DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) constantValue)); + return new ConstantVectorExpression(outCol, (HiveIntervalDayTime) constantValue); case FLOAT_FAMILY: return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue()); case DECIMAL: - VectorExpression ve = new ConstantVectorExpression(outCol, (HiveDecimal) constantValue); - // Set type name with decimal precision, scale, etc. - ve.setOutputType(typeName); - return ve; + return new ConstantVectorExpression(outCol, (HiveDecimal) constantValue, typeName); case STRING: return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes()); case CHAR: @@ -1240,8 +1244,8 @@ private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf, VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType); // Replace with the milliseconds conversion - if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestampViaLongToLong) { - ve = createVectorExpression(CastMillisecondsLongToTimestampViaLongToLong.class, + if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestamp) { + ve = createVectorExpression(CastMillisecondsLongToTimestamp.class, childExpr, Mode.PROJECTION, returnType); } @@ -1526,13 +1530,13 @@ private VectorExpression getInExpression(List childExpr, Mode mode expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType); ((ILongInExpr) expr).setInListValues(inVals); } else if (isTimestampFamily(colType)) { - cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); - long[] inVals = new long[childrenForInList.size()]; + cl = (mode == Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class); + Timestamp[] inVals = new Timestamp[childrenForInList.size()]; for (int i = 0; i != inVals.length; i++) { inVals[i] = getTimestampScalar(childrenForInList.get(i)); } expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType); - ((ILongInExpr) expr).setInListValues(inVals); + ((ITimestampInExpr) expr).setInListValues(inVals); } else if (isStringFamily(colType)) { cl = (mode == Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class); byte[][] inVals = new byte[childrenForInList.size()][]; @@ -1834,7 +1838,7 @@ private VectorExpression getCastToDoubleExpression(Class udf, List childExpr cl = FilterCharColumnBetween.class; } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { cl = FilterCharColumnNotBetween.class; - } else if (colType.equals("timestamp")) { - - // Get timestamp boundary values as longs instead of the expected strings - long left = getTimestampScalar(childExpr.get(2)); - long right = getTimestampScalar(childExpr.get(3)); - childrenAfterNot = new ArrayList(); - childrenAfterNot.add(colExpr); - childrenAfterNot.add(new ExprNodeConstantDesc(left)); - childrenAfterNot.add(new ExprNodeConstantDesc(right)); - if (notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; - } else { - cl = FilterLongColumnBetween.class; - } + } else if (colType.equals("timestamp") && !notKeywordPresent) { + cl = FilterTimestampColumnBetween.class; + } else if (colType.equals("timestamp") && notKeywordPresent) { + cl = FilterTimestampColumnNotBetween.class; } else if (isDecimalFamily(colType) && !notKeywordPresent) { cl = FilterDecimalColumnBetween.class; } else if (isDecimalFamily(colType) && notKeywordPresent) { @@ -2056,6 +2050,7 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) // Make vectorized operator String normalizedName = getNormalizedName(resultTypeName); + VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, normalizedName, argDescs); // Set child expressions @@ -2173,21 +2168,17 @@ private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws H VectorExpression.Type type = VectorExpression.Type.getValue(t); Object scalarValue = getScalarValue(constDesc); switch (type) { - case TIMESTAMP: - return TimestampUtils.getTimeNanoSec((Timestamp) scalarValue); case DATE: return DateWritable.dateToDays((Date) scalarValue); case INTERVAL_YEAR_MONTH: return ((HiveIntervalYearMonth) scalarValue).getTotalMonths(); - case INTERVAL_DAY_TIME: - return DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) scalarValue); default: return scalarValue; } } - // Get a timestamp as a long in number of nanos, from a string constant or cast - private long getTimestampScalar(ExprNodeDesc expr) throws HiveException { + // Get a timestamp from a string constant or cast + private Timestamp getTimestampScalar(ExprNodeDesc expr) throws HiveException { if (expr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) expr).getGenericUDF() instanceof GenericUDFTimestamp) { return evaluateCastToTimestamp(expr); @@ -2215,7 +2206,7 @@ private long getTimestampScalar(ExprNodeDesc expr) throws HiveException { + "Expecting string."); } - private long evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { + private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { ExprNodeGenericFuncDesc expr2 = (ExprNodeGenericFuncDesc) expr; ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr2); ObjectInspector output = evaluator.initialize(null); @@ -2226,7 +2217,7 @@ private long evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { throw new HiveException("Udf: failed to convert to timestamp"); } Timestamp ts = (Timestamp) java; - return TimestampUtils.getTimeNanoSec(ts); + return ts; } private Constructor getConstructor(Class cl) throws HiveException { @@ -2315,7 +2306,7 @@ public static String mapTypeNameSynonyms(String typeName) { } } - public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) throws HiveException { + public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) { switch (typeInfo.getCategory()) { case STRUCT: return Type.STRUCT; @@ -2336,11 +2327,13 @@ public static String mapTypeNameSynonyms(String typeName) { case INT: case LONG: case DATE: - case TIMESTAMP: case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: return ColumnVector.Type.LONG; + case INTERVAL_DAY_TIME: + case TIMESTAMP: + return ColumnVector.Type.TIMESTAMP; + case FLOAT: case DOUBLE: return ColumnVector.Type.DOUBLE; @@ -2369,47 +2362,58 @@ public static String mapTypeNameSynonyms(String typeName) { // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively // marked map-side (HASH). static ArrayList aggregatesDefinition = new ArrayList() {{ - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY, null, VectorUDAFMinLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATE_INTERVAL_YEAR_MONTH, null, VectorUDAFMinLong.class)); add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY, null, VectorUDAFMaxLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.TIMESTAMP_INTERVAL_DAY_TIME, null, VectorUDAFMinTimestamp.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATE_INTERVAL_YEAR_MONTH, null, VectorUDAFMaxLong.class)); add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.TIMESTAMP_INTERVAL_DAY_TIME, null, VectorUDAFMaxTimestamp.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATE_INTERVAL_YEAR_MONTH, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.TIMESTAMP_INTERVAL_DAY_TIME, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFAvgTimestamp.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFVarPopTimestamp.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFVarPopTimestamp.class)); + add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFVarSampTimestamp.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdPopTimestamp.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdPopTimestamp.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdPopTimestamp.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdSampTimestamp.class)); }}; public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduceMergePartial) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 9b90f37..a68d0cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -141,11 +141,12 @@ public static ColumnVector createColumnVector(TypeInfo typeInfo) { case SHORT: case INT: case LONG: - case TIMESTAMP: case DATE: case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case INTERVAL_DAY_TIME: + case TIMESTAMP: + return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case FLOAT: case DOUBLE: return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); @@ -393,13 +394,12 @@ private static void setVector(Object row, } break; case TIMESTAMP: { - LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex]; if (writableCol != null) { - Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); - lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t); + lcv.set(rowIndex, ((TimestampWritable) writableCol).getTimestamp()); lcv.isNull[rowIndex] = false; } else { - lcv.vector[rowIndex] = 1; + lcv.setNullValue(rowIndex); setNullColIsNullValue(lcv, rowIndex); } } @@ -583,6 +583,8 @@ static ColumnVector cloneColumnVector(ColumnVector source return new DecimalColumnVector(decColVector.vector.length, decColVector.precision, decColVector.scale); + } else if (source instanceof TimestampColumnVector) { + return new TimestampColumnVector(((TimestampColumnVector) source).getLength()); } else if (source instanceof ListColumnVector) { ListColumnVector src = (ListColumnVector) source; ColumnVector child = cloneColumnVector(src.child); @@ -682,6 +684,10 @@ public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, Strin } } else if (colVector instanceof DecimalColumnVector) { sb.append(((DecimalColumnVector) colVector).vector[index].toString()); + } else if (colVector instanceof TimestampColumnVector) { + Timestamp timestamp = new Timestamp(0); + ((TimestampColumnVector) colVector).timestampUpdate(timestamp, index); + sb.append(timestamp.toString()); } else { sb.append("Unknown"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 0ec91b8..7e79e1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -370,18 +370,18 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch, Object[] partition } else { lcv.fill(DateWritable.dateToDays((Date) value)); lcv.isNull[0] = false; - } + } } break; - + case TIMESTAMP: { - LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[colIndex]; if (value == null) { lcv.noNulls = false; lcv.isNull[0] = true; lcv.isRepeating = true; - } else { - lcv.fill(TimestampUtils.getTimeNanoSec((Timestamp) value)); + } else { + lcv.fill((Timestamp) value); lcv.isNull[0] = false; } } @@ -400,14 +400,14 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch, Object[] partition } case INTERVAL_DAY_TIME: { - LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + TimestampColumnVector tcv = (TimestampColumnVector) batch.cols[colIndex]; if (value == null) { - lcv.noNulls = false; - lcv.isNull[0] = true; - lcv.isRepeating = true; + tcv.noNulls = false; + tcv.isNull[0] = true; + tcv.isRepeating = true; } else { - lcv.fill(DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) value)); - lcv.isNull[0] = false; + tcv.fill(((HiveIntervalDayTime) value).pisaTimestampUpdate(tcv.useScratchPisaTimestamp())); + tcv.isNull[0] = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java index a52cf19..2b0068d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java @@ -18,20 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; /** * Type cast decimal to timestamp. The decimal value is interpreted * as NNNN.DDDDDDDDD where NNNN is a number of seconds and DDDDDDDDD * is a number of nano-seconds. */ -public class CastDecimalToTimestamp extends FuncDecimalToLong { +public class CastDecimalToTimestamp extends FuncDecimalToTimestamp { private static final long serialVersionUID = 1L; - private static transient HiveDecimal tenE9 = HiveDecimal.create(1000000000); - public CastDecimalToTimestamp(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); } @@ -40,13 +43,8 @@ public CastDecimalToTimestamp() { } @Override - protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { - HiveDecimal result = inV.vector[i].getHiveDecimal().multiply(tenE9); - if (result == null) { - outV.noNulls = false; - outV.isNull[i] = true; - } else { - outV.vector[i] = result.longValue(); - } + protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i) { + Timestamp timestamp = TimestampWritable.decimalToTimestamp(inV.vector[i].getHiveDecimal()); + outV.set(i, timestamp); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java new file mode 100644 index 0000000..39823fe --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; + +public class CastDoubleToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastDoubleToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastDoubleToTimestamp() { + super(); + } + + private void setSecondsWithFractionalNanoseconds(TimestampColumnVector timestampColVector, + double[] vector, int elementNum) { + timestampColVector.setTimestampSecondsWithFractionalNanoseconds(elementNum, vector[elementNum]); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + double[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + setSecondsWithFractionalNanoseconds(outputColVector, vector, 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + } + } else { + for(int i = 0; i != n; i++) { + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + setSecondsWithFractionalNanoseconds(outputColVector, vector, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("double")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java index 32cefea..ceefd61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java @@ -63,62 +63,6 @@ public void evaluate(VectorizedRowBatch batch) { } switch (inputTypes[0]) { - case TIMESTAMP: - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - date.setTime(inV.vector[0] / 1000000); - outV.vector[0] = DateWritable.dateToDays(date); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - outV.isRepeating = false; - } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - date.setTime(inV.vector[0] / 1000000); - outV.vector[0] = DateWritable.dateToDays(date); - } - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - } - outV.isRepeating = false; - } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - } - outV.isRepeating = false; - } - } - break; - case DATE: inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV); break; @@ -155,7 +99,7 @@ public String getOutputType() { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATETIME_FAMILY) + VectorExpressionDescriptor.ArgumentType.DATE) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java new file mode 100644 index 0000000..d344d4d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastLongToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastLongToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastLongToTimestamp() { + super(); + } + + private void setSeconds(TimestampColumnVector timestampColVector, long[] vector, int elementNum) { + timestampColVector.setTimestampSeconds(elementNum, vector[elementNum]); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + setSeconds(outputColVector, vector, 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSeconds(outputColVector, vector, i); + } + } else { + for(int i = 0; i != n; i++) { + setSeconds(outputColVector, vector, i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setSeconds(outputColVector, vector, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + setSeconds(outputColVector, vector, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java new file mode 100644 index 0000000..a0c947f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastMillisecondsLongToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastMillisecondsLongToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastMillisecondsLongToTimestamp() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputColVector.setTimestampMilliseconds(0, vector[0]); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setTimestampMilliseconds(i, vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setTimestampMilliseconds(i, vector[i]); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setTimestampMilliseconds(i, vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setTimestampMilliseconds(i, vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index 518d5d5..a3ddf9f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde.serdeConstants; @@ -55,7 +56,7 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumn]; if (n == 0) { @@ -112,13 +113,13 @@ public void evaluate(VectorizedRowBatch batch) { } } - private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(TimestampColumnVector outV, BytesColumnVector inV, int i) { try { HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf( new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - outV.vector[i] = DateUtils.getIntervalDayTimeTotalNanos(interval); + outV.setEpochSecondsAndSignedNanos(i, interval.getTotalSeconds(), interval.getNanos()); } catch (Exception e) { - outV.vector[i] = 1; + outV.setNullValue(i); outV.isNull[i] = true; outV.noNulls = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java new file mode 100644 index 0000000..55b84b1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToBoolean extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToBoolean(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToBoolean() { + super(); + } + + private int toBool(TimestampColumnVector timestampColVector, int index) { + return (timestampColVector.getEpochDay(index) != 0 || + timestampColVector.getNanoOfDay(index) != 0) ? 1 : 0; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = toBool(inputColVector, 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = toBool(inputColVector, i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = toBool(inputColVector, i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = toBool(inputColVector, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = toBool(inputColVector, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDate.java new file mode 100644 index 0000000..8ba69f3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDate.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; + +/** + * To be used to cast timestamp to decimal. + */ +public class CastTimestampToDate extends FuncTimestampToLong { + + private static final long serialVersionUID = 1L; + + public CastTimestampToDate() { + super(); + this.outputType = "date"; + } + + public CastTimestampToDate(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + this.outputType = "date"; + } + + @Override + protected void func(LongColumnVector outV, TimestampColumnVector inV, int i) { + + long day = inV.getEpochDay(i); + outV.vector[i] = day; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java index 0aedddc..aec104e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java @@ -20,12 +20,12 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; /** * To be used to cast timestamp to decimal. */ -public class CastTimestampToDecimal extends FuncLongToDecimal { +public class CastTimestampToDecimal extends FuncTimestampToDecimal { private static final long serialVersionUID = 1L; @@ -38,12 +38,12 @@ public CastTimestampToDecimal(int inputColumn, int outputColumn) { } @Override - protected void func(DecimalColumnVector outV, LongColumnVector inV, int i) { + protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i) { - // The resulting decimal value is 10e-9 * the input long value (i.e. seconds). - // - HiveDecimal result = HiveDecimal.create(inV.vector[i]); - result = result.scaleByPowerOfTen(-9); + // The BigDecimal class recommends not converting directly from double to BigDecimal, + // so we convert like the non-vectorized case and got through a string... + Double timestampDouble = inV.getTimestampSecondsWithFractionalNanos(i); + HiveDecimal result = HiveDecimal.create(timestampDouble.toString()); outV.set(i, result); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java new file mode 100644 index 0000000..f8737f9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToDouble extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToDouble(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToDouble() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + double[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.getTimestampSecondsWithFractionalNanos(0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getTimestampSecondsWithFractionalNanos(i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "double"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java new file mode 100644 index 0000000..4f53f5c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToLong extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToLong(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToLong() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.getEpochSeconds(0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSeconds(i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSeconds(i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSeconds(i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSeconds(i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 8d75cf3..24ee9bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -18,10 +18,19 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hive.common.util.DateUtils; /** * Constant is represented as a vector with repeating values. @@ -30,21 +39,15 @@ private static final long serialVersionUID = 1L; - private static enum Type { - LONG, - DOUBLE, - BYTES, - DECIMAL - } - private int outputColumn; protected long longValue = 0; private double doubleValue = 0; private byte[] bytesValue = null; private HiveDecimal decimalValue = null; + private PisaTimestamp timestampValue = null; private boolean isNullValue = false; - private Type type; + private ColumnVector.Type type; private int bytesValueLength = 0; public ConstantVectorExpression() { @@ -82,11 +85,22 @@ public ConstantVectorExpression(int outputColumn, HiveVarchar value) { setBytesValue(value.getValue().getBytes()); } - public ConstantVectorExpression(int outputColumn, HiveDecimal value) { - this(outputColumn, "decimal"); + // Include type name for precision/scale. + public ConstantVectorExpression(int outputColumn, HiveDecimal value, String typeName) { + this(outputColumn, typeName); setDecimalValue(value); } + public ConstantVectorExpression(int outputColumn, Timestamp value) { + this(outputColumn, "timestamp"); + setTimestampValue(value); + } + + public ConstantVectorExpression(int outputColumn, HiveIntervalDayTime value) { + this(outputColumn, "timestamp"); + setIntervalDayTimeValue(value); + } + /* * Support for null constant object */ @@ -140,6 +154,17 @@ private void evaluateDecimal(VectorizedRowBatch vrg) { } } + private void evaluateTimestamp(VectorizedRowBatch vrg) { + TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumn]; + dcv.isRepeating = true; + dcv.noNulls = !isNullValue; + if (!isNullValue) { + dcv.set(0, timestampValue); + } else { + dcv.isNull[0] = true; + } + } + @Override public void evaluate(VectorizedRowBatch vrg) { switch (type) { @@ -155,6 +180,9 @@ public void evaluate(VectorizedRowBatch vrg) { case DECIMAL: evaluateDecimal(vrg); break; + case TIMESTAMP: + evaluateTimestamp(vrg); + break; } } @@ -192,39 +220,37 @@ public void setDecimalValue(HiveDecimal decimalValue) { this.decimalValue = decimalValue; } - public String getTypeString() { - return getOutputType(); + public HiveDecimal getDecimalValue() { + return decimalValue; } - public void setTypeString(String typeString) { - this.outputType = typeString; - if (VectorizationContext.isStringFamily(typeString)) { - this.type = Type.BYTES; - } else if (VectorizationContext.isFloatFamily(typeString)) { - this.type = Type.DOUBLE; - } else if (VectorizationContext.isDecimalFamily(typeString)){ - this.type = Type.DECIMAL; - } else { - // everything else that does not belong to string, double, decimal is treated as long. - this.type = Type.LONG; - } + public void setTimestampValue(Timestamp timestampValue) { + this.timestampValue = new PisaTimestamp(timestampValue); } - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; + public void setIntervalDayTimeValue(HiveIntervalDayTime intervalDayTimeValue) { + this.timestampValue = intervalDayTimeValue.pisaTimestampUpdate(new PisaTimestamp()); + } + + + public PisaTimestamp getTimestampValue() { + return timestampValue; } - public Type getType() { - return type; + public String getTypeString() { + return getOutputType(); } - public void setType(Type type) { - this.type = type; + private void setTypeString(String typeString) { + this.outputType = typeString; + + String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + this.type = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); } - @Override - public void setOutputType(String type) { - setTypeString(type); + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java new file mode 100644 index 0000000..8d2a186 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateColSubtractDateColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp1; + private PisaTimestamp scratchPisaTimestamp2; + private DateTimeMath dtm = new DateTimeMath(); + + public DateColSubtractDateColumn(int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp1 = new PisaTimestamp(); + scratchPisaTimestamp2 = new PisaTimestamp(); + } + + public DateColSubtractDateColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type interval_day_time. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1, + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1, + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2, + i); + } + } else { + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2, + i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java new file mode 100644 index 0000000..3ea9331 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateColSubtractDateScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public DateColSubtractDateScalar(int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public DateColSubtractDateScalar() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java new file mode 100644 index 0000000..a8cabb8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateScalarSubtractDateColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public DateScalarSubtractDateColumn(long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public DateScalarSubtractDateColumn() { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromTimestampMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java new file mode 100644 index 0000000..42e4984 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; +import java.util.HashSet; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Evaluate IN filter on a batch for a vector of timestamps. + */ +public class FilterTimestampColumnInList extends VectorExpression implements ITimestampInExpr { + private static final long serialVersionUID = 1L; + private int inputCol; + private Timestamp[] inListValues; + + // The set object containing the IN list. + private transient HashSet inSet; + + public FilterTimestampColumnInList() { + super(); + inSet = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public FilterTimestampColumnInList(int colNum) { + this.inputCol = colNum; + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new HashSet(inListValues.length); + for (Timestamp val : inListValues) { + inSet.add(new PisaTimestamp(val)); + } + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + PisaTimestamp scratchTimestamp = new PisaTimestamp(); + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + if (!(inSet.contains(scratchTimestamp))) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + if (!inSet.contains(scratchTimestamp)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + public void setInListValues(Timestamp[] a) { + this.inListValues = a; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java new file mode 100644 index 0000000..561c152 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary decimal functions and expressions returning timestamps that + * operate directly on the input and set the output. + */ +public abstract class FuncDecimalToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncDecimalToTimestamp(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public FuncDecimalToTimestamp() { + super(); + } + + abstract protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java new file mode 100644 index 0000000..774551c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary timestamp functions and expressions returning decimals that + * operate directly on the input and set the output. + */ +public abstract class FuncTimestampToDecimal extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncTimestampToDecimal(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + this.outputType = "decimal"; + } + + public FuncTimestampToDecimal() { + super(); + this.outputType = "decimal"; + } + + abstract protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java new file mode 100644 index 0000000..b84d9be --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary timestamp functions and expressions returning long that + * operate directly on the input and set the output. + */ +public abstract class FuncTimestampToLong extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncTimestampToLong(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + this.outputType = "long"; + } + + public FuncTimestampToLong() { + super(); + this.outputType = "long"; + } + + abstract protected void func(LongColumnVector outV, TimestampColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ITimestampInExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ITimestampInExpr.java new file mode 100644 index 0000000..f6cc971 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ITimestampInExpr.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +public interface ITimestampInExpr { + void setInListValues(Timestamp[] inVals); +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java new file mode 100644 index 0000000..a6f8057 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public class IfExprIntervalDayTimeColumnColumn extends IfExprTimestampColumnColumnBase { + + private static final long serialVersionUID = 1L; + + public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + super(arg1Column, arg2Column, arg3Column, outputColumn); + } + + public IfExprIntervalDayTimeColumnColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java new file mode 100644 index 0000000..4beb50a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class IfExprIntervalDayTimeColumnScalar extends IfExprTimestampColumnScalarBase { + + private static final long serialVersionUID = 1L; + + public IfExprIntervalDayTimeColumnScalar(int arg1Column, int arg2Column, HiveIntervalDayTime arg3Scalar, + int outputColumn) { + super(arg1Column, arg2Column, arg3Scalar.pisaTimestampUpdate(new PisaTimestamp()), outputColumn); + } + + public IfExprIntervalDayTimeColumnScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java new file mode 100644 index 0000000..5463c7c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class IfExprIntervalDayTimeScalarColumn extends IfExprTimestampScalarColumnBase { + + private static final long serialVersionUID = 1L; + + public IfExprIntervalDayTimeScalarColumn(int arg1Column, HiveIntervalDayTime arg2Scalar, int arg3Column, + int outputColumn) { + super(arg1Column, arg2Scalar.pisaTimestampUpdate(new PisaTimestamp()), arg3Column, outputColumn); + } + + public IfExprIntervalDayTimeScalarColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java new file mode 100644 index 0000000..af2e0c0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public class IfExprIntervalDayTimeScalarScalar extends IfExprTimestampScalarScalarBase { + + private static final long serialVersionUID = 1L; + + public IfExprIntervalDayTimeScalarScalar(int arg1Column, HiveIntervalDayTime arg2Scalar, HiveIntervalDayTime arg3Scalar, + int outputColumn) { + super(arg1Column, arg2Scalar.pisaTimestampUpdate(new PisaTimestamp()), arg3Scalar.pisaTimestampUpdate(new PisaTimestamp()), outputColumn); + } + + public IfExprIntervalDayTimeScalarScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time"), + VectorExpressionDescriptor.ArgumentType.getType("interval_day_time")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index a2bb3d0..06ba8f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -173,8 +173,8 @@ public void setOutputColumn(int outputColumn) { .setNumArguments(3) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family")) + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumn.java new file mode 100644 index 0000000..a1e489b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumn.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public class IfExprTimestampColumnColumn extends IfExprTimestampColumnColumnBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + super(arg1Column, arg2Column, arg3Column, outputColumn); + } + + public IfExprTimestampColumnColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java new file mode 100644 index 0000000..d3dd67d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public abstract class IfExprTimestampColumnColumnBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column, arg3Column; + private int outputColumn; + + public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprTimestampColumnColumnBase() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector arg2ColVector = (TimestampColumnVector) batch.cols[arg2Column]; + TimestampColumnVector arg3ColVector = (TimestampColumnVector) batch.cols[arg3Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchPisaTimestamp(i) : arg3ColVector.asScratchPisaTimestamp(i)); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchPisaTimestamp(i) : arg3ColVector.asScratchPisaTimestamp(i)); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchPisaTimestamp(i) : arg3ColVector.asScratchPisaTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchPisaTimestamp(i) : arg3ColVector.asScratchPisaTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalar.java new file mode 100644 index 0000000..0660038 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalar.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class IfExprTimestampColumnScalar extends IfExprTimestampColumnScalarBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampColumnScalar(int arg1Column, int arg2Column, Timestamp arg3Scalar, + int outputColumn) { + super(arg1Column, arg2Column, new PisaTimestamp(arg3Scalar), outputColumn); + } + + public IfExprTimestampColumnScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java new file mode 100644 index 0000000..8aaad3f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public abstract class IfExprTimestampColumnScalarBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column; + private PisaTimestamp arg3Scalar; + private int outputColumn; + + public IfExprTimestampColumnScalarBase(int arg1Column, int arg2Column, PisaTimestamp arg3Scalar, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public IfExprTimestampColumnScalarBase() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector arg2ColVector = (TimestampColumnVector) batch.cols[arg2Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchPisaTimestamp(i) : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchPisaTimestamp(i) : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchPisaTimestamp(i) : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.asScratchPisaTimestamp(i) : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumn.java new file mode 100644 index 0000000..7f618cb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumn.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class IfExprTimestampScalarColumn extends IfExprTimestampScalarColumnBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampScalarColumn(int arg1Column, Timestamp arg2Scalar, int arg3Column, + int outputColumn) { + super(arg1Column, new PisaTimestamp(arg2Scalar), arg3Column, outputColumn); + } + + public IfExprTimestampScalarColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java new file mode 100644 index 0000000..84d7655 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public abstract class IfExprTimestampScalarColumnBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg3Column; + private PisaTimestamp arg2Scalar; + private int outputColumn; + + public IfExprTimestampScalarColumnBase(int arg1Column, PisaTimestamp arg2Scalar, int arg3Column, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprTimestampScalarColumnBase() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector arg3ColVector = (TimestampColumnVector) batch.cols[arg3Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + // This could be optimized in the future by having separate paths + // for when arg3ColVector is repeating or has no nulls. + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchPisaTimestamp(i)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchPisaTimestamp(i)); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3ColVector.asScratchPisaTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3ColVector.asScratchPisaTimestamp(i)); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } + } + + // restore repeating and no nulls indicators + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalar.java new file mode 100644 index 0000000..5286ea3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalar.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +import java.sql.Timestamp; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public class IfExprTimestampScalarScalar extends IfExprTimestampScalarScalarBase { + + private static final long serialVersionUID = 1L; + + public IfExprTimestampScalarScalar(int arg1Column, Timestamp arg2Scalar, Timestamp arg3Scalar, + int outputColumn) { + super(arg1Column, new PisaTimestamp(arg2Scalar), new PisaTimestamp(arg3Scalar), outputColumn); + } + + public IfExprTimestampScalarScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("int_family"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java new file mode 100644 index 0000000..1aeabfc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import java.util.Arrays; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public abstract class IfExprTimestampScalarScalarBase extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column; + private PisaTimestamp arg2Scalar; + private PisaTimestamp arg3Scalar; + private int outputColumn; + + public IfExprTimestampScalarScalarBase(int arg1Column, PisaTimestamp arg2Scalar, PisaTimestamp arg3Scalar, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public IfExprTimestampScalarScalarBase() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = false; // output is a scalar which we know is non null + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + outputColVector.fill(arg3Scalar); + } + } else if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + outputIsNull[i] = false; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + Arrays.fill(outputIsNull, 0, n, false); + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + public int getArg1Column() { + return arg1Column; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java index 9b9f15e..a77d41a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongColumn.java @@ -160,8 +160,8 @@ public void setOutputColumn(int outputColumn) { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family")) + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java index 94c5bed..6ee5daf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongColumn.java @@ -160,8 +160,8 @@ public void setOutputColumn(int outputColumn) { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family")) + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java index dda941e..053ced9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongColumn.java @@ -160,8 +160,8 @@ public void setOutputColumn(int outputColumn) { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family")) + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java index aacdfe6..25d52b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongColumn.java @@ -160,8 +160,8 @@ public void setOutputColumn(int outputColumn) { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family")) + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java index b041ab6..e6e54e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongColumn.java @@ -160,8 +160,8 @@ public void setOutputColumn(int outputColumn) { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family")) + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java index 179e493..865fdb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongColumn.java @@ -160,8 +160,8 @@ public void setOutputColumn(int outputColumn) { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family"), - VectorExpressionDescriptor.ArgumentType.getType("int_datetime_interval_family")) + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("int_date_interval_year_month")) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java index 2eb48fb..3c6824d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; /** * Utility functions to handle null propagation. @@ -81,6 +82,31 @@ public static void setNullDataEntriesBytes( } } + /** + * Set the data value for all NULL entries to the designated NULL_VALUE. + */ + public static void setNullDataEntriesTimestamp( + TimestampColumnVector v, boolean selectedInUse, int[] sel, int n) { + if (v.noNulls) { + return; + } else if (v.isRepeating && v.isNull[0]) { + v.setNullValue(0); + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if(v.isNull[i]) { + v.setNullValue(i); + } + } + } else { + for (int i = 0; i != n; i++) { + if(v.isNull[i]) { + v.setNullValue(i); + } + } + } + } + // for use by Column-Scalar and Scalar-Column arithmetic for null propagation public static void setNullOutputEntriesColScalar( ColumnVector v, boolean selectedInUse, int[] sel, int n) { @@ -89,8 +115,11 @@ public static void setNullOutputEntriesColScalar( // No need to set null data entries because the input NaN values // will automatically propagate to the output. return; + } else if (v instanceof LongColumnVector) { + setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n); + } else if (v instanceof TimestampColumnVector){ + setNullDataEntriesTimestamp((TimestampColumnVector) v, selectedInUse, sel, n); } - setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java new file mode 100644 index 0000000..2d7d0c2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; +import java.util.HashSet; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class TimestampColumnInList extends VectorExpression implements ITimestampInExpr { + private static final long serialVersionUID = 1L; + private int inputCol; + private Timestamp[] inListValues; + private int outputColumn; + + private transient PisaTimestamp scratchTimestamp; + + + // The set object containing the IN list. + private transient HashSet inSet; + + public TimestampColumnInList() { + super(); + inSet = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public TimestampColumnInList(int colNum, int outputColumn) { + this.inputCol = colNum; + this.outputColumn = outputColumn; + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new HashSet(inListValues.length); + for (Timestamp val : inListValues) { + inSet.add(new PisaTimestamp(val)); + } + scratchTimestamp = new PisaTimestamp(); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + outputVector[0] = inSet.contains(scratchTimestamp) ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + outputVector[0] = inSet.contains(scratchTimestamp) ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNulls[i] = nullPos[i]; + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } + } + } + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + public void setInListValues(Timestamp[] a) { + this.inListValues = a; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java new file mode 100644 index 0000000..052d57c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary long functions returning strings that operate directly on the + * input and set the output. + */ +abstract public class TimestampToStringUnaryUDF extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public TimestampToStringUnaryUDF(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public TimestampToStringUnaryUDF() { + super(); + } + + abstract protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + outV.initBuffer(); + + if (n == 0) { + //Nothing to do + return; + } + + if (inputColVector.noNulls) { + outV.noNulls = true; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + func(outV, inputColVector, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inputColVector, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inputColVector, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inputColVector.isNull[0]; + if (!inputColVector.isNull[0]) { + func(outV, inputColVector, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; + if (!inputColVector.isNull[i]) { + func(outV, inputColVector, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + func(outV, inputColVector, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index c0e4cf0..8fca8a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -144,6 +144,8 @@ public String toString() { b.append(this.getClass().getSimpleName()); b.append("["); b.append(this.getOutputColumn()); + b.append(":"); + b.append(this.getOutputType()); b.append("]"); if (childExpressions != null) { b.append("("); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java index d91b880..326bfb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java @@ -18,12 +18,15 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.io.Writable; /** * Interface used to create Writable objects from vector expression primitives. @@ -37,6 +40,9 @@ Object writeValue(byte[] value, int start, int length) throws HiveException; Object writeValue(HiveDecimalWritable value) throws HiveException; Object writeValue(HiveDecimal value) throws HiveException; + Object writeValue(TimestampWritable value) throws HiveException; + Object writeValue(Timestamp value) throws HiveException; + Object writeValue(PisaTimestamp value) throws HiveException; Object setValue(Object row, ColumnVector column, int columnRow) throws HiveException; Object initValue(Object ost) throws HiveException; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index bbf8862..1fc932d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -162,6 +163,44 @@ public Object setValue(Object field, HiveDecimalWritable value) throws HiveExcep public Object setValue(Object field, HiveDecimal value) throws HiveException { throw new HiveException("Internal error: should not reach here"); } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + @Override + public Object writeValue(Timestamp value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + @Override + public Object writeValue(TimestampWritable value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + public Object setValue(Object field, TimestampWritable value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the PisaTimestamp specialization + */ + @Override + public Object writeValue(PisaTimestamp value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + public Object setValue(Object field, Timestamp value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } } /** @@ -366,6 +405,66 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx } } + /** + * Specialized writer for TimestampColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterTimestamp extends VectorExpressionWriterBase { + @Override + public Object writeValue(ColumnVector column, int row) throws HiveException { + TimestampColumnVector dcv = (TimestampColumnVector) column; + TimestampWritable timestampWritable = (TimestampWritable) dcv.getScratchWritable(); + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + dcv.setScratchWritable(timestampWritable); + } + if (dcv.noNulls && !dcv.isRepeating) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (dcv.noNulls && dcv.isRepeating) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + + @Override + public Object setValue(Object field, ColumnVector column, int row) throws HiveException { + TimestampColumnVector dcv = (TimestampColumnVector) column; + TimestampWritable timestampWritable = (TimestampWritable) dcv.getScratchWritable(); + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + dcv.setScratchWritable(timestampWritable); + } + if (dcv.noNulls && !dcv.isRepeating) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (dcv.noNulls && dcv.isRepeating) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + } + /** * Compiles the appropriate vector expression writer based on an expression info (ExprNodeDesc) */ @@ -514,6 +613,22 @@ public Object setValue(Object field, HiveDecimal value) { } @Override + public Object setValue(Object field, TimestampWritable value) { + if (null == field) { + field = initValue(null); + } + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); + } + + @Override + public Object setValue(Object field, Timestamp value) { + if (null == field) { + field = initValue(null); + } + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); + } + + @Override public Object initValue(Object ignored) { return ((SettableHiveDecimalObjectInspector) this.objectInspector).create( HiveDecimal.ZERO); @@ -560,41 +675,53 @@ public Object initValue(Object ignored) { } private static VectorExpressionWriter genVectorExpressionWritableTimestamp( - SettableTimestampObjectInspector fieldObjInspector) throws HiveException { - return new VectorExpressionWriterLong() { + SettableTimestampObjectInspector fieldObjInspector) throws HiveException { + + return new VectorExpressionWriterTimestamp() { private Object obj; - private Timestamp ts; - public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector) - throws HiveException { + public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector) throws HiveException { super.init(objInspector); - ts = new Timestamp(0); obj = initValue(null); return this; } @Override - public Object writeValue(long value) { - TimestampUtils.assignTimeInNanoSec(value, ts); - ((SettableTimestampObjectInspector) this.objectInspector).set(obj, ts); - return obj; + public Object writeValue(TimestampWritable value) throws HiveException { + return ((SettableTimestampObjectInspector) this.objectInspector).set(obj, value); } @Override - public Object setValue(Object field, long value) { + public Object writeValue(Timestamp value) throws HiveException { + return ((SettableTimestampObjectInspector) this.objectInspector).set(obj, value); + } + + @Override + public Object writeValue(PisaTimestamp value) throws HiveException { + return ((SettableTimestampObjectInspector) this.objectInspector).set(obj, value.asScratchTimestamp()); + } + + @Override + public Object setValue(Object field, TimestampWritable value) { if (null == field) { field = initValue(null); } - TimestampUtils.assignTimeInNanoSec(value, ts); - ((SettableTimestampObjectInspector) this.objectInspector).set(field, ts); - return field; + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); + } + + @Override + public Object setValue(Object field, Timestamp value) { + if (null == field) { + field = initValue(null); + } + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); } @Override public Object initValue(Object ignored) { return ((SettableTimestampObjectInspector) this.objectInspector).create(new Timestamp(0)); } - }.init(fieldObjInspector); + }.init(fieldObjInspector); } private static VectorExpressionWriter genVectorExpressionWritableIntervalYearMonth( @@ -638,33 +765,55 @@ public Object initValue(Object ignored) { private static VectorExpressionWriter genVectorExpressionWritableIntervalDayTime( SettableHiveIntervalDayTimeObjectInspector fieldObjInspector) throws HiveException { - return new VectorExpressionWriterLong() { + + return new VectorExpressionWriterTimestamp() { private Object obj; private HiveIntervalDayTime interval; + private PisaTimestamp pisaTimestamp; public VectorExpressionWriter init(SettableHiveIntervalDayTimeObjectInspector objInspector) throws HiveException { super.init(objInspector); interval = new HiveIntervalDayTime(); obj = initValue(null); + pisaTimestamp = new PisaTimestamp(); return this; } @Override - public Object writeValue(long value) { - DateUtils.setIntervalDayTimeTotalNanos(interval, value); - ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(obj, interval); - return obj; + public Object writeValue(TimestampWritable value) throws HiveException { + interval.set(pisaTimestamp.updateFromTimestamp(value.getTimestamp())); + return ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(obj, interval); } @Override - public Object setValue(Object field, long value) { + public Object writeValue(Timestamp value) throws HiveException { + interval.set(pisaTimestamp.updateFromTimestamp(value)); + return ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(obj, interval); + } + + @Override + public Object writeValue(PisaTimestamp value) throws HiveException { + interval.set(value); + return ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(obj, interval); + } + + @Override + public Object setValue(Object field, TimestampWritable value) { if (null == field) { field = initValue(null); } - DateUtils.setIntervalDayTimeTotalNanos(interval, value); - ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(field, interval); - return field; + interval.set(pisaTimestamp.updateFromTimestamp(value.getTimestamp())); + return ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(field, interval); + } + + @Override + public Object setValue(Object field, Timestamp value) { + if (null == field) { + field = initValue(null); + } + interval.set(pisaTimestamp.updateFromTimestamp(value)); + return ((SettableHiveIntervalDayTimeObjectInspector) this.objectInspector).set(field, interval); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index c4a70c0..9f5c793 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -164,8 +165,8 @@ public void evaluate(VectorizedRowBatch batch) { } protected byte[] evaluateTimestamp(ColumnVector columnVector, int index, long numDays) { - LongColumnVector lcv = (LongColumnVector) columnVector; - calendar.setTimeInMillis(lcv.vector[index] / 1000000); + TimestampColumnVector tcv = (TimestampColumnVector) columnVector; + calendar.setTimeInMillis(tcv.getTimestampMilliseconds(index)); if (isPositive) { calendar.add(Calendar.DATE, (int) numDays); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index f540994..6390ecd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -208,8 +209,8 @@ public void evaluate(VectorizedRowBatch batch) { } protected byte[] evaluateTimestamp(ColumnVector columnVector, int index) { - LongColumnVector lcv = (LongColumnVector) columnVector; - calendar.setTimeInMillis(lcv.vector[index] / 1000000); + TimestampColumnVector tcv = (TimestampColumnVector) columnVector; + calendar.setTimeInMillis(tcv.getTimestampMilliseconds(index)); if (isPositive) { calendar.add(Calendar.DATE, numDays); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 2b473ec..2d0a28a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -26,6 +26,7 @@ import org.apache.hadoop.io.Text; import java.io.UnsupportedEncodingException; +import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Arrays; @@ -38,6 +39,7 @@ private int colNum; private int outputColumn; private long longValue = 0; + private Timestamp timestampValue = null; private byte[] stringValue = null; protected boolean isPositive = true; private transient final Calendar calendar = Calendar.getInstance(); @@ -56,6 +58,8 @@ public VectorUDFDateAddScalarCol(Object object, int colNum, int outputColumn) { if (object instanceof Long) { this.longValue = (Long) object; + } else if (object instanceof Timestamp) { + this.timestampValue = (Timestamp) object; } else if (object instanceof byte []) { this.stringValue = (byte[]) object; } @@ -81,7 +85,7 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - baseDate.setTime(longValue / 1000000); + baseDate.setTime(timestampValue.getTime()); break; case STRING: diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java index 93a54ae..b22c31f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -165,29 +166,8 @@ private LongColumnVector toDateArray(VectorizedRowBatch batch, Type colType, switch (colType) { case TIMESTAMP: - LongColumnVector lcv = (LongColumnVector) inputColVector; - lcv.copySelected(batch.selectedInUse, batch.selected, batch.size, dateVector); - if (dateVector.isRepeating) { - date.setTime(dateVector.vector[0] / 1000000); - dateVector.vector[0] = DateWritable.dateToDays(date); - } else { - if (batch.selectedInUse) { - for (int j = 0; j != size; j++) { - int i = batch.selected[j]; - if (!dateVector.isNull[i]) { - date.setTime(dateVector.vector[i] / 1000000); - dateVector.vector[i] = DateWritable.dateToDays(date); - } - } - } else { - for (int i = 0; i != size; i++) { - if (!dateVector.isNull[i]) { - date.setTime(dateVector.vector[i] / 1000000); - dateVector.vector[i] = DateWritable.dateToDays(date); - } - } - } - } + TimestampColumnVector tcv = (TimestampColumnVector) inputColVector; + copySelected(tcv, batch.selectedInUse, batch.selected, batch.size, dateVector); return dateVector; case STRING: @@ -280,6 +260,73 @@ private void setDays(BytesColumnVector input, LongColumnVector output, int i) { } } + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + TimestampColumnVector input, boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = input.noNulls; + output.isRepeating = false; + + // Handle repeating case + if (input.isRepeating) { + output.isNull[0] = input.isNull[0]; + output.isRepeating = true; + + if (!input.isNull[0]) { + date.setTime(input.getTimestampMilliseconds(0)); + output.vector[0] = DateWritable.dateToDays(date); + } + return; + } + + // Handle normal case + + // Copy data values over + if (input.noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + date.setTime(input.getTimestampMilliseconds(i)); + output.vector[i] = DateWritable.dateToDays(date); + } + } else { + for (int i = 0; i < size; i++) { + date.setTime(input.getTimestampMilliseconds(i)); + output.vector[i] = DateWritable.dateToDays(date); + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = input.isNull[i]; + } + } + else { + System.arraycopy(input.isNull, 0, output.isNull, 0, size); + } + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + if (!input.isNull[i]) { + date.setTime(input.getTimestampMilliseconds(i)); + output.vector[i] = DateWritable.dateToDays(date); + } + } + } else { + for (int i = 0; i < size; i++) { + if (!input.isNull[i]) { + date.setTime(input.getTimestampMilliseconds(i)); + output.vector[i] = DateWritable.dateToDays(date); + } + } + } + } + } + @Override public int getOutputColumn() { return this.outputColumn; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 0dfe8d5..ab71b47 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -22,12 +22,14 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; import java.sql.Date; +import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -37,6 +39,7 @@ private int colNum; private int outputColumn; private long longValue; + private Timestamp timestampValue; private byte[] stringValue; private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient final Text text = new Text(); @@ -50,6 +53,8 @@ public VectorUDFDateDiffColScalar(int colNum, Object object, int outputColumn) { if (object instanceof Long) { this.longValue = (Long) object; + } else if (object instanceof Timestamp) { + this.timestampValue = (Timestamp) object; } else if (object instanceof byte []) { this.stringValue = (byte []) object; } @@ -87,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - date.setTime(longValue / 1000000); + date.setTime(timestampValue.getTime()); baseDate = DateWritable.dateToDays(date); break; @@ -232,8 +237,8 @@ public void evaluate(VectorizedRowBatch batch) { } protected int evaluateTimestamp(ColumnVector columnVector, int index) { - LongColumnVector lcv = (LongColumnVector) columnVector; - date.setTime(lcv.vector[index] / 1000000); + TimestampColumnVector tcv = (TimestampColumnVector) columnVector; + date.setTime(tcv.getTimestampMilliseconds(index)); return DateWritable.dateToDays(date) - baseDate; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index 3ea82aa..dea5444 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -21,12 +21,14 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.io.Text; import java.sql.Date; +import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -36,6 +38,7 @@ private int colNum; private int outputColumn; private long longValue; + private Timestamp timestampValue = null; private byte[] stringValue; private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient final Text text = new Text(); @@ -49,6 +52,8 @@ public VectorUDFDateDiffScalarCol(Object object, int colNum, int outputColumn) { if (object instanceof Long) { this.longValue = (Long) object; + } else if (object instanceof Timestamp) { + this.timestampValue = (Timestamp) object; } else if (object instanceof byte []) { this.stringValue = (byte[]) object; } @@ -86,7 +91,7 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - date.setTime(longValue / 1000000); + date.setTime(timestampValue.getTime()); baseDate = DateWritable.dateToDays(date); break; @@ -231,8 +236,8 @@ public void evaluate(VectorizedRowBatch batch) { } protected int evaluateTimestamp(ColumnVector columnVector, int index) { - LongColumnVector lcv = (LongColumnVector) columnVector; - date.setTime(lcv.vector[index] / 1000000); + TimestampColumnVector tcv = (TimestampColumnVector) columnVector; + date.setTime(tcv.getTimestampMilliseconds(index)); return baseDate - DateWritable.dateToDays(date); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java index 9883fe6..a58bfb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java @@ -47,9 +47,6 @@ protected void func(BytesColumnVector outV, long[] vector, int i) { date.setTime(DateWritable.daysToMillis((int) vector[i])); break; - case TIMESTAMP: - date.setTime(vector[i] / 1000000); - break; default: throw new Error("Unsupported input type " + inputTypes[0].name()); } @@ -68,7 +65,7 @@ protected void func(BytesColumnVector outV, long[] vector, int i) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATETIME_FAMILY) + VectorExpressionDescriptor.ArgumentType.DATE) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateTimestamp.java new file mode 100644 index 0000000..c29e22e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateTimestamp.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +import java.io.UnsupportedEncodingException; +import java.sql.Date; +import java.text.SimpleDateFormat; + +public class VectorUDFDateTimestamp extends TimestampToStringUnaryUDF { + private static final long serialVersionUID = 1L; + + private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private transient Date date = new Date(0); + + public VectorUDFDateTimestamp() { + super(); + } + + public VectorUDFDateTimestamp(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(BytesColumnVector outV, TimestampColumnVector inV, int i) { + switch (inputTypes[0]) { + case TIMESTAMP: + date.setTime(inV.getTimestampMilliseconds(i)); + break; + + default: + throw new Error("Unsupported input type " + inputTypes[0].name()); + } + try { + byte[] bytes = formatter.format(date).getBytes("UTF-8"); + outV.setRef(i, bytes, 0, bytes.length); + } catch (UnsupportedEncodingException e) { + outV.vector[i] = null; + outV.isNull[i] = true; + } + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthDate.java new file mode 100644 index 0000000..8addb20 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthDate.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get day of month. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFDayOfMonthDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFDayOfMonthDate(int colNum, int outputColumn) { + super(Calendar.DAY_OF_MONTH, colNum, outputColumn); + } + + public VectorUDFDayOfMonthDate() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java deleted file mode 100644 index bbd734c..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthLong.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.Calendar; - -/** - * Expression to get day of month. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFDayOfMonthLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - - public VectorUDFDayOfMonthLong(int colNum, int outputColumn) { - super(Calendar.DAY_OF_MONTH, colNum, outputColumn); - } - - public VectorUDFDayOfMonthLong() { - super(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthTimestamp.java new file mode 100644 index 0000000..4df48ee --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDayOfMonthTimestamp.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get day of month. + * Extends {@link VectorUDFTimestampFieldTimestamp} + */ +public final class VectorUDFDayOfMonthTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + public VectorUDFDayOfMonthTimestamp(int colNum, int outputColumn) { + super(Calendar.DAY_OF_MONTH, colNum, outputColumn); + } + + public VectorUDFDayOfMonthTimestamp() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourDate.java new file mode 100644 index 0000000..0e33e25 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourDate.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Returns hour of day. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFHourDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFHourDate(int colNum, int outputColumn) { + super(Calendar.HOUR_OF_DAY, colNum, outputColumn); + } + + public VectorUDFHourDate() { + super(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourLong.java deleted file mode 100644 index 1d4d572..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourLong.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.Calendar; - -/** - * Returns hour of day. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFHourLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - - public VectorUDFHourLong(int colNum, int outputColumn) { - super(Calendar.HOUR_OF_DAY, colNum, outputColumn); - } - - public VectorUDFHourLong() { - super(); - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourTimestamp.java new file mode 100644 index 0000000..93961bc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFHourTimestamp.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Returns hour of day. + * Extends {@link VectorUDFTimestampFieldTimestamp} + */ +public final class VectorUDFHourTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + public VectorUDFHourTimestamp(int colNum, int outputColumn) { + super(Calendar.HOUR_OF_DAY, colNum, outputColumn); + } + + public VectorUDFHourTimestamp() { + super(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteDate.java new file mode 100644 index 0000000..98182ae --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteDate.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Returns minute value. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFMinuteDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFMinuteDate(int colNum, int outputColumn) { + super(Calendar.MINUTE, colNum, outputColumn); + } + + public VectorUDFMinuteDate() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteLong.java deleted file mode 100644 index 4e3eede..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteLong.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.Calendar; - -/** - * Returns minute value. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFMinuteLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - - public VectorUDFMinuteLong(int colNum, int outputColumn) { - super(Calendar.MINUTE, colNum, outputColumn); - } - - public VectorUDFMinuteLong() { - super(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteTimestamp.java new file mode 100644 index 0000000..7e4a262 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMinuteTimestamp.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Returns minute value. + * Extends {@link VectorUDFTimestampFieldTimestamp} + */ +public final class VectorUDFMinuteTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + public VectorUDFMinuteTimestamp(int colNum, int outputColumn) { + super(Calendar.MINUTE, colNum, outputColumn); + } + + public VectorUDFMinuteTimestamp() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java new file mode 100644 index 0000000..aac8ab7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthDate.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Returns month value. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFMonthDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFMonthDate(int colNum, int outputColumn) { + super(Calendar.MONTH, colNum, outputColumn); + } + + public VectorUDFMonthDate() { + super(); + } + + @Override + protected long getDateField(long days) { + /* january is 0 */ + return 1 + super.getDateField(days); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java deleted file mode 100644 index 58724a4..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.Calendar; - -/** - * Returns month value. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFMonthLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - - public VectorUDFMonthLong(int colNum, int outputColumn) { - super(Calendar.MONTH, colNum, outputColumn); - } - - public VectorUDFMonthLong() { - super(); - } - - @Override - protected long getTimestampField(long time) { - /* january is 0 */ - return 1 + super.getTimestampField(time); - } - - @Override - protected long getDateField(long days) { - /* january is 0 */ - return 1 + super.getDateField(days); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthTimestamp.java new file mode 100644 index 0000000..e966636 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthTimestamp.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; + +/** + * Returns month value. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFMonthTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + public VectorUDFMonthTimestamp(int colNum, int outputColumn) { + super(Calendar.MONTH, colNum, outputColumn); + } + + public VectorUDFMonthTimestamp() { + super(); + } + + @Override + protected long getTimestampField(TimestampColumnVector timestampColVector, int elementNum) { + /* january is 0 */ + return 1 + super.getTimestampField(timestampColVector, elementNum); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondDate.java new file mode 100644 index 0000000..fbae390 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondDate.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get seconds. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFSecondDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFSecondDate(int colNum, int outputColumn) { + super(Calendar.SECOND, colNum, outputColumn); + } + + public VectorUDFSecondDate() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondLong.java deleted file mode 100644 index 41655ec..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondLong.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.Calendar; - -/** - * Expression to get seconds. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFSecondLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - - public VectorUDFSecondLong(int colNum, int outputColumn) { - super(Calendar.SECOND, colNum, outputColumn); - } - - public VectorUDFSecondLong() { - super(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondTimestamp.java new file mode 100644 index 0000000..97842f0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFSecondTimestamp.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get seconds. + * Extends {@link VectorUDFTimestampFieldTimestamp} + */ +public final class VectorUDFSecondTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + public VectorUDFSecondTimestamp(int colNum, int outputColumn) { + super(Calendar.SECOND, colNum, outputColumn); + } + + public VectorUDFSecondTimestamp() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java new file mode 100644 index 0000000..0a3a87a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +import com.google.common.base.Preconditions; + + +/** + * Abstract class to return various fields from a Timestamp or Date. + */ +public abstract class VectorUDFTimestampFieldDate extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected int outputColumn; + protected int field; + protected transient final Calendar calendar = Calendar.getInstance(); + + public VectorUDFTimestampFieldDate(int field, int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + this.field = field; + } + + public VectorUDFTimestampFieldDate() { + super(); + } + + protected long getDateField(long days) { + calendar.setTimeInMillis(DateWritable.daysToMillis((int) days)); + return calendar.get(field); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + Preconditions.checkState(inputTypes[0] == VectorExpression.Type.DATE); + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + ColumnVector inputColVec = batch.cols[this.colNum]; + + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputColVec.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse; + + if(batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputColVec.isRepeating; + + LongColumnVector longColVector = (LongColumnVector) inputColVec; + + if (inputColVec.noNulls) { + outV.noNulls = true; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getDateField(longColVector.vector[i]); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = getDateField(longColVector.vector[i]); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getDateField(longColVector.vector[i]); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getDateField(longColVector.vector[i]); + } + } + } + } + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public int getField() { + return field; + } + + public void setField(int field) { + this.field = field; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DATE) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java deleted file mode 100644 index 3b9fffc..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java +++ /dev/null @@ -1,229 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.sql.Timestamp; -import java.util.Calendar; - -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.serde2.io.DateWritable; - -/** - * Abstract class to return various fields from a Timestamp or Date. - */ -public abstract class VectorUDFTimestampFieldLong extends VectorExpression { - - private static final long serialVersionUID = 1L; - - protected int colNum; - protected int outputColumn; - protected int field; - protected transient final Calendar calendar = Calendar.getInstance(); - protected transient final Timestamp ts = new Timestamp(0); - - public VectorUDFTimestampFieldLong(int field, int colNum, int outputColumn) { - this(); - this.colNum = colNum; - this.outputColumn = outputColumn; - this.field = field; - } - - public VectorUDFTimestampFieldLong() { - super(); - } - - protected final Timestamp getTimestamp(long nanos) { - /* - * new Timestamp() stores the millisecond precision values in the nanos field. - * If you wanted to store 200ms it will result in nanos being set to 200*1000*1000. - * When you call setNanos(0), because there are no sub-ms times, it will set it to 0, - * ending up with a Timestamp which refers to 0ms by accident. - * CAVEAT: never use a sub-second value in new Timestamp() args, just use setNanos to set it. - */ - long ms = (nanos / (1000 * 1000 * 1000)) * 1000; - /* the milliseconds should be kept in nanos */ - long ns = nanos % (1000*1000*1000); - if (ns < 0) { - /* - * Due to the way java.sql.Timestamp stores sub-second values, it throws an exception - * if nano seconds are negative. The timestamp implementation handles this by using - * negative milliseconds and adjusting the nano seconds up by the same to be positive. - * Read Timestamp.java:setTime() implementation for this code. - */ - ms -= 1000; - ns += 1000*1000*1000; - } - ts.setTime(ms); - ts.setNanos((int) ns); - return ts; - } - - protected long getTimestampField(long time) { - calendar.setTime(getTimestamp(time)); - return calendar.get(field); - } - - protected long getDateField(long days) { - calendar.setTimeInMillis(DateWritable.daysToMillis((int) days)); - return calendar.get(field); - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; - LongColumnVector inputCol = (LongColumnVector)batch.cols[this.colNum]; - /* every line below this is identical for evaluateLong & evaluateString */ - final int n = inputCol.isRepeating ? 1 : batch.size; - int[] sel = batch.selected; - final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; - - if(batch.size == 0) { - /* n != batch.size when isRepeating */ - return; - } - - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; - - switch (inputTypes[0]) { - case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = getTimestampField(inputCol.vector[i]); - } - } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = getTimestampField(inputCol.vector[i]); - } - } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getTimestampField(inputCol.vector[i]); - } - } - } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getTimestampField(inputCol.vector[i]); - } - } - } - } - break; - - case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = getDateField(inputCol.vector[i]); - } - } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = getDateField(inputCol.vector[i]); - } - } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getDateField(inputCol.vector[i]); - } - } - } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getDateField(inputCol.vector[i]); - } - } - } - } - break; - default: - throw new Error("Unsupported input type " + inputTypes[0].name()); - } - } - - @Override - public int getOutputColumn() { - return this.outputColumn; - } - - @Override - public String getOutputType() { - return "long"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public int getField() { - return field; - } - - public void setField(int field) { - this.field = field; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATETIME_FAMILY) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); - return b.build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java new file mode 100644 index 0000000..5fca678 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import com.google.common.base.Preconditions; + +/** + * Abstract class to return various fields from a Timestamp. + */ +public abstract class VectorUDFTimestampFieldTimestamp extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected int outputColumn; + protected int field; + protected transient final Calendar calendar = Calendar.getInstance(); + + public VectorUDFTimestampFieldTimestamp(int field, int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + this.field = field; + } + + public VectorUDFTimestampFieldTimestamp() { + super(); + } + + protected long getTimestampField(TimestampColumnVector timestampColVector, int elementNum) { + calendar.setTime(timestampColVector.asScratchTimestamp(elementNum)); + return calendar.get(field); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + Preconditions.checkState(inputTypes[0] == VectorExpression.Type.TIMESTAMP); + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + ColumnVector inputColVec = batch.cols[this.colNum]; + + /* every line below this is identical for evaluateLong & evaluateString */ + final int n = inputColVec.isRepeating ? 1 : batch.size; + int[] sel = batch.selected; + final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse; + + if(batch.size == 0) { + /* n != batch.size when isRepeating */ + return; + } + + /* true for all algebraic UDFs with no state */ + outV.isRepeating = inputColVec.isRepeating; + + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec; + + if (inputColVec.noNulls) { + outV.noNulls = true; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getTimestampField(timestampColVector, i); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = getTimestampField(timestampColVector, i); + } + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getTimestampField(timestampColVector, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getTimestampField(timestampColVector, i); + } + } + } + } + } + + @Override + public int getOutputColumn() { + return this.outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public int getField() { + return field; + } + + public void setField(int field) { + this.field = field; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java new file mode 100644 index 0000000..b7c4ff4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampDate.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Return Unix Timestamp. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFUnixTimeStampDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + @Override + protected long getDateField(long days) { + long ms = DateWritable.daysToMillis((int) days); + return ms / 1000; + } + + public VectorUDFUnixTimeStampDate(int colNum, int outputColumn) { + /* not a real field */ + super(-1, colNum, outputColumn); + } + + public VectorUDFUnixTimeStampDate() { + super(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java deleted file mode 100644 index 6df68f0..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.serde2.io.DateWritable; - -/** - * Return Unix Timestamp. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFUnixTimeStampLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - - @Override - protected long getTimestampField(long time) { - long ms = (time / (1000*1000*1000)) * 1000; - long remainder = time % (1000*1000*1000); - /* negative timestamps need to be adjusted */ - if(remainder < 0) { - ms -= 1000; - } - return ms / 1000; - } - - @Override - protected long getDateField(long days) { - long ms = DateWritable.daysToMillis((int) days); - return ms / 1000; - } - - public VectorUDFUnixTimeStampLong(int colNum, int outputColumn) { - /* not a real field */ - super(-1, colNum, outputColumn); - } - - public VectorUDFUnixTimeStampLong() { - super(); - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampTimestamp.java new file mode 100644 index 0000000..e4a31ca --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampTimestamp.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Return Unix Timestamp. + * Extends {@link VectorUDFTimestampFieldTimestamp} + */ +public final class VectorUDFUnixTimeStampTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + @Override + protected long getTimestampField(TimestampColumnVector timestampColVector, int elementNum) { + return timestampColVector.getTimestampSeconds(elementNum); + } + + public VectorUDFUnixTimeStampTimestamp(int colNum, int outputColumn) { + /* not a real field */ + super(-1, colNum, outputColumn); + } + + public VectorUDFUnixTimeStampTimestamp() { + super(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearDate.java new file mode 100644 index 0000000..8e8f125 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearDate.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get week of year. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFWeekOfYearDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFWeekOfYearDate(int colNum, int outputColumn) { + super(Calendar.WEEK_OF_YEAR, colNum, outputColumn); + initCalendar(); + } + + public VectorUDFWeekOfYearDate() { + super(); + initCalendar(); + } + + private void initCalendar() { + /* code copied over from UDFWeekOfYear implementation */ + calendar.setFirstDayOfWeek(Calendar.MONDAY); + calendar.setMinimalDaysInFirstWeek(4); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearLong.java deleted file mode 100644 index 1ebadda..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearLong.java +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.Calendar; - -/** - * Expression to get week of year. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFWeekOfYearLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - - public VectorUDFWeekOfYearLong(int colNum, int outputColumn) { - super(Calendar.WEEK_OF_YEAR, colNum, outputColumn); - initCalendar(); - } - - public VectorUDFWeekOfYearLong() { - super(); - initCalendar(); - } - - private void initCalendar() { - /* code copied over from UDFWeekOfYear implementation */ - calendar.setFirstDayOfWeek(Calendar.MONDAY); - calendar.setMinimalDaysInFirstWeek(4); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearTimestamp.java new file mode 100644 index 0000000..4b9c26b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFWeekOfYearTimestamp.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get week of year. + * Extends {@link VectorUDFTimestampFieldTimestamp} + */ +public final class VectorUDFWeekOfYearTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + public VectorUDFWeekOfYearTimestamp(int colNum, int outputColumn) { + super(Calendar.WEEK_OF_YEAR, colNum, outputColumn); + initCalendar(); + } + + public VectorUDFWeekOfYearTimestamp() { + super(); + initCalendar(); + } + + private void initCalendar() { + /* code copied over from UDFWeekOfYear implementation */ + calendar.setFirstDayOfWeek(Calendar.MONDAY); + calendar.setMinimalDaysInFirstWeek(4); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearDate.java new file mode 100644 index 0000000..a2d098d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearDate.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get year as a long. + * Extends {@link VectorUDFTimestampFieldDate} + */ +public final class VectorUDFYearDate extends VectorUDFTimestampFieldDate { + + private static final long serialVersionUID = 1L; + + public VectorUDFYearDate(int colNum, int outputColumn) { + super(Calendar.YEAR, colNum, outputColumn); + } + + public VectorUDFYearDate() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java deleted file mode 100644 index 41c9d5b..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import java.util.Arrays; -import java.util.Calendar; - -/** - * Expression to get year as a long. - * Extends {@link VectorUDFTimestampFieldLong} - */ -public final class VectorUDFYearLong extends VectorUDFTimestampFieldLong { - - private static final long serialVersionUID = 1L; - /* year boundaries in nanoseconds */ - private static transient final long[] YEAR_BOUNDARIES; - private static transient final int MIN_YEAR = 1678; - private static transient final int MAX_YEAR = 2300; - - static { - YEAR_BOUNDARIES = new long[MAX_YEAR-MIN_YEAR]; - Calendar c = Calendar.getInstance(); - c.setTimeInMillis(0); // c.set doesn't reset millis - /* 1901 Jan is not with in range */ - for(int year=MIN_YEAR+1; year <= MAX_YEAR; year++) { - c.set(year, Calendar.JANUARY, 1, 0, 0, 0); - YEAR_BOUNDARIES[year-MIN_YEAR-1] = c.getTimeInMillis()*1000*1000; - } - } - - @Override - protected long getTimestampField(long time) { - /* binarySearch is faster than a loop doing a[i] (no array out of bounds checks) */ - int year = Arrays.binarySearch(YEAR_BOUNDARIES, time); - if(year >= 0) { - /* 0 == 1902 etc */ - return MIN_YEAR + 1 + year; - } else { - /* -1 == 1901, -2 == 1902 */ - return MIN_YEAR - 1 - year; - } - } - - public VectorUDFYearLong(int colNum, int outputColumn) { - super(Calendar.YEAR, colNum, outputColumn); - } - - public VectorUDFYearLong() { - super(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearTimestamp.java new file mode 100644 index 0000000..f418bb3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearTimestamp.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.util.Calendar; + +/** + * Expression to get year as a long. + * Extends {@link VectorUDFTimestampFieldTimestamp} + */ +public final class VectorUDFYearTimestamp extends VectorUDFTimestampFieldTimestamp { + + private static final long serialVersionUID = 1L; + + public VectorUDFYearTimestamp(int colNum, int outputColumn) { + super(Calendar.YEAR, colNum, outputColumn); + } + + public VectorUDFYearTimestamp() { + super(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java new file mode 100644 index 0000000..5c8db41 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java @@ -0,0 +1,482 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** + * Generated from template VectorUDAFAvg.txt. + */ +@Description(name = "avg", + value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: timestamp)") +public class VectorUDAFAvgTimestamp extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private double sum; + transient private long count; + + /** + * Value is explicitly (re)initialized in reset() + */ + transient private boolean isNull = true; + + public void sumValue(double value) { + if (isNull) { + sum = value; + count = 1; + isNull = false; + } else { + sum += value; + count++; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset () { + isNull = true; + sum = 0; + count = 0L; + } + } + + private VectorExpression inputExpression; + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private StructObjectInspector soi; + + public VectorUDAFAvgTimestamp(VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public VectorUDAFAvgTimestamp() { + super(); + partialResult = new Object[2]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + initPartialResultInspector(); + } + + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize); + } + } + } else { + if (inputColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize, inputColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize, inputColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getTimestampSecondsWithFractionalNanos(selection[i])); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getTimestampSecondsWithFractionalNanos(i)); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + j); + myagg.sumValue(inputColVector.getTimestampSecondsWithFractionalNanos(i)); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getTimestampSecondsWithFractionalNanos(i)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = + (TimestampColumnVector)batch.cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += inputColVector.getTimestampSecondsWithFractionalNanos(0)*batchSize; + myagg.count += batchSize; + } + return; + } + + if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getTimestampSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += value; + myagg.count += 1; + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + + for (int i=0; i< batchSize; ++i) { + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getTimestampSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getTimestampSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getTimestampSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getTimestampSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getTimestampSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getTimestampSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getTimestampSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i SELECT _FUNC_('2009-07-30') FROM src LIMIT 1;\n" + " 30") -@VectorizedExpressions({VectorUDFDayOfMonthLong.class, VectorUDFDayOfMonthString.class}) +@VectorizedExpressions({VectorUDFDayOfMonthDate.class, VectorUDFDayOfMonthString.class, VectorUDFDayOfMonthTimestamp.class}) @NDV(maxNdv = 31) public class UDFDayOfMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java index 87e19ec..bccf5a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHour.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFHourTimestamp; import org.apache.hadoop.hive.ql.udf.generic.NDV; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -48,7 +49,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 12\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 12") -@VectorizedExpressions({VectorUDFHourLong.class, VectorUDFHourString.class}) +@VectorizedExpressions({VectorUDFHourDate.class, VectorUDFHourString.class, VectorUDFHourTimestamp.class}) @NDV(maxNdv = 24) public class UDFHour extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java index 0f55266..2896385 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMinute.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMinuteTimestamp; import org.apache.hadoop.hive.ql.udf.generic.NDV; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -48,7 +49,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 58\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 58") -@VectorizedExpressions({VectorUDFMinuteLong.class, VectorUDFMinuteString.class}) +@VectorizedExpressions({VectorUDFMinuteDate.class, VectorUDFMinuteString.class, VectorUDFMinuteTimestamp.class}) @NDV(maxNdv = 60) public class UDFMinute extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java index efe5ee2..8c2b0e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMonthTimestamp; import org.apache.hadoop.hive.ql.udf.generic.NDV; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; @@ -48,7 +49,7 @@ + "4. A year-month interval value" + "Example:\n" + " > SELECT _FUNC_('2009-07-30') FROM src LIMIT 1;\n" + " 7") -@VectorizedExpressions({VectorUDFMonthLong.class, VectorUDFMonthString.class}) +@VectorizedExpressions({VectorUDFMonthDate.class, VectorUDFMonthString.class, VectorUDFMonthTimestamp.class}) @NDV(maxNdv = 31) public class UDFMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java index b724970..f5ce2b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSecond.java @@ -27,8 +27,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFSecondTimestamp; import org.apache.hadoop.hive.ql.udf.generic.NDV; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; @@ -51,7 +52,7 @@ + " > SELECT _FUNC_('2009-07-30 12:58:59') FROM src LIMIT 1;\n" + " 59\n" + " > SELECT _FUNC_('12:58:59') FROM src LIMIT 1;\n" + " 59") -@VectorizedExpressions({VectorUDFSecondLong.class, VectorUDFSecondString.class}) +@VectorizedExpressions({VectorUDFSecondDate.class, VectorUDFSecondString.class, VectorUDFSecondTimestamp.class}) @NDV(maxNdv = 60) public class UDFSecond extends UDF { private final SimpleDateFormat formatter1 = new SimpleDateFormat( diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index 9786636..17b892c 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -26,7 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToBooleanViaDoubleToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDateToBooleanViaLongToLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToBooleanViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToBoolean; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -45,7 +45,7 @@ * */ @VectorizedExpressions({CastLongToBooleanViaLongToLong.class, - CastDateToBooleanViaLongToLong.class, CastTimestampToBooleanViaLongToLong.class, + CastDateToBooleanViaLongToLong.class, CastTimestampToBoolean.class, CastDoubleToBooleanViaDoubleToLong.class, CastDecimalToBoolean.class}) public class UDFToBoolean extends UDF { private final BooleanWritable booleanWritable = new BooleanWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java index d274531..159dd0f 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToLongViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToLong; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -40,7 +40,7 @@ * UDFToByte. * */ -@VectorizedExpressions({CastTimestampToLongViaLongToLong.class, CastDoubleToLong.class, +@VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class}) public class UDFToByte extends UDF { private final ByteWritable byteWritable = new ByteWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java index 8084537..5763947 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToDouble; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToDoubleViaLongToDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -38,7 +38,7 @@ * UDFToDouble. * */ -@VectorizedExpressions({CastTimestampToDoubleViaLongToDouble.class, CastLongToDouble.class, +@VectorizedExpressions({CastTimestampToDouble.class, CastLongToDouble.class, CastDecimalToDouble.class}) public class UDFToDouble extends UDF { private final DoubleWritable doubleWritable = new DoubleWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java index 129da43..e2183f4 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToDouble; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToDoubleViaLongToDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -39,7 +39,7 @@ * UDFToFloat. * */ -@VectorizedExpressions({CastTimestampToDoubleViaLongToDouble.class, CastLongToDouble.class, +@VectorizedExpressions({CastTimestampToDouble.class, CastLongToDouble.class, CastDecimalToDouble.class}) public class UDFToFloat extends UDF { private final FloatWritable floatWritable = new FloatWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java index b669754..5f5d1fe 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToLongViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToLong; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -41,7 +41,7 @@ * UDFToInteger. * */ -@VectorizedExpressions({CastTimestampToLongViaLongToLong.class, CastDoubleToLong.class, +@VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class}) public class UDFToInteger extends UDF { private final IntWritable intWritable = new IntWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java index 04ff7cf..3eeabea 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToLongViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToLong; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -40,7 +40,7 @@ * UDFToLong. * */ -@VectorizedExpressions({CastTimestampToLongViaLongToLong.class, CastDoubleToLong.class, +@VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class}) public class UDFToLong extends UDF { private final LongWritable longWritable = new LongWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java index 5315552..b9065b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToLongViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToLong; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -41,7 +41,7 @@ * UDFToShort. * */ -@VectorizedExpressions({CastTimestampToLongViaLongToLong.class, CastDoubleToLong.class, +@VectorizedExpressions({CastTimestampToLong.class, CastDoubleToLong.class, CastDecimalToLong.class}) public class UDFToShort extends UDF { ShortWritable shortWritable = new ShortWritable(); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java index 42ee1bf..e03c049 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFWeekOfYear.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFWeekOfYearTimestamp; import org.apache.hadoop.hive.ql.udf.generic.NDV; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -45,7 +46,7 @@ + " > SELECT _FUNC_('2008-02-20') FROM src LIMIT 1;\n" + " 8\n" + " > SELECT _FUNC_('1980-12-31 12:59:59') FROM src LIMIT 1;\n" + " 1") -@VectorizedExpressions({VectorUDFWeekOfYearLong.class, VectorUDFWeekOfYearString.class}) +@VectorizedExpressions({VectorUDFWeekOfYearDate.class, VectorUDFWeekOfYearString.class, VectorUDFWeekOfYearTimestamp.class}) @NDV(maxNdv = 52) public class UDFWeekOfYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java index de46104..d7ecd8c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearTimestamp; import org.apache.hadoop.hive.ql.udf.generic.NDV; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; @@ -48,7 +49,7 @@ + "4. A year-month interval value" + "Example:\n " + " > SELECT _FUNC_('2009-07-30') FROM src LIMIT 1;\n" + " 2009") -@VectorizedExpressions({VectorUDFYearLong.class, VectorUDFYearString.class}) +@VectorizedExpressions({VectorUDFYearDate.class, VectorUDFYearString.class, VectorUDFYearTimestamp.class}) @NDV(maxNdv = 20) // although technically its unbounded, its unlikely we will ever see ndv > 20 public class UDFYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java index 10e648e..8c376a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDate.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -50,7 +51,7 @@ extended = "Example:\n " + " > SELECT _FUNC_('2009-07-30 04:17:52') FROM src LIMIT 1;\n" + " '2009-07-30'") -@VectorizedExpressions({VectorUDFDateString.class, VectorUDFDateLong.class}) +@VectorizedExpressions({VectorUDFDateString.class, VectorUDFDateLong.class, VectorUDFDateTimestamp.class}) public class GenericUDFDate extends GenericUDF { private transient SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private transient TimestampConverter timestampConverter; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index b5e2837..e1fdc41 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -41,6 +41,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprIntervalDayTimeScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar; @@ -48,6 +52,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarCharScalar; @@ -76,6 +84,7 @@ IfExprLongScalarDoubleColumn.class, IfExprDoubleScalarLongColumn.class, IfExprLongScalarLongScalar.class, IfExprDoubleScalarDoubleScalar.class, IfExprLongScalarDoubleScalar.class, IfExprDoubleScalarLongScalar.class, + IfExprStringGroupColumnStringGroupColumn.class, IfExprStringGroupColumnStringScalar.class, IfExprStringGroupColumnCharScalar.class, IfExprStringGroupColumnVarCharScalar.class, @@ -83,7 +92,12 @@ IfExprCharScalarStringGroupColumn.class, IfExprVarCharScalarStringGroupColumn.class, IfExprStringScalarStringScalar.class, IfExprStringScalarCharScalar.class, IfExprStringScalarVarCharScalar.class, - IfExprCharScalarStringScalar.class, IfExprVarCharScalarStringScalar.class + IfExprCharScalarStringScalar.class, IfExprVarCharScalarStringScalar.class, + + IfExprIntervalDayTimeColumnColumn.class, IfExprIntervalDayTimeColumnScalar.class, + IfExprIntervalDayTimeScalarColumn.class, IfExprIntervalDayTimeScalarScalar.class, + IfExprTimestampColumnColumn.class, IfExprTimestampColumnScalar.class, + IfExprTimestampScalarColumn.class, IfExprTimestampScalarScalar.class, }) public class GenericUDFIf extends GenericUDF { private transient ObjectInspector[] argumentOIs; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java index e82627d..b393843 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java @@ -37,32 +37,56 @@ DoubleColEqualLongScalar.class, DoubleColEqualDoubleScalar.class, LongScalarEqualLongColumn.class, LongScalarEqualDoubleColumn.class, DoubleScalarEqualLongColumn.class, DoubleScalarEqualDoubleColumn.class, + StringGroupColEqualStringGroupColumn.class, FilterStringGroupColEqualStringGroupColumn.class, StringGroupColEqualStringScalar.class, StringGroupColEqualVarCharScalar.class, StringGroupColEqualCharScalar.class, StringScalarEqualStringGroupColumn.class, VarCharScalarEqualStringGroupColumn.class, CharScalarEqualStringGroupColumn.class, + FilterStringGroupColEqualStringScalar.class, FilterStringScalarEqualStringGroupColumn.class, FilterStringGroupColEqualVarCharScalar.class, FilterVarCharScalarEqualStringGroupColumn.class, FilterStringGroupColEqualCharScalar.class, FilterCharScalarEqualStringGroupColumn.class, + FilterLongColEqualLongColumn.class, FilterLongColEqualDoubleColumn.class, FilterDoubleColEqualLongColumn.class, FilterDoubleColEqualDoubleColumn.class, FilterLongColEqualLongScalar.class, FilterLongColEqualDoubleScalar.class, FilterDoubleColEqualLongScalar.class, FilterDoubleColEqualDoubleScalar.class, FilterLongScalarEqualLongColumn.class, FilterLongScalarEqualDoubleColumn.class, FilterDoubleScalarEqualLongColumn.class, FilterDoubleScalarEqualDoubleColumn.class, + FilterDecimalColEqualDecimalColumn.class, FilterDecimalColEqualDecimalScalar.class, FilterDecimalScalarEqualDecimalColumn.class, + + TimestampColEqualTimestampColumn.class, TimestampColEqualTimestampScalar.class, TimestampScalarEqualTimestampColumn.class, + TimestampColEqualLongColumn.class, + TimestampColEqualLongScalar.class, TimestampScalarEqualLongColumn.class, + TimestampColEqualDoubleColumn.class, + TimestampColEqualDoubleScalar.class, TimestampScalarEqualDoubleColumn.class, + LongColEqualTimestampColumn.class, + LongColEqualTimestampScalar.class, LongScalarEqualTimestampColumn.class, + DoubleColEqualTimestampColumn.class, + DoubleColEqualTimestampScalar.class, DoubleScalarEqualTimestampColumn.class, + + FilterTimestampColEqualTimestampColumn.class, FilterTimestampColEqualTimestampScalar.class, FilterTimestampScalarEqualTimestampColumn.class, - TimestampColEqualLongScalar.class, LongScalarEqualTimestampColumn.class, - FilterTimestampColEqualLongScalar.class, FilterLongScalarEqualTimestampColumn.class, - TimestampColEqualDoubleScalar.class, DoubleScalarEqualTimestampColumn.class, - FilterTimestampColEqualDoubleScalar.class, FilterDoubleScalarEqualTimestampColumn.class, + FilterTimestampColEqualLongColumn.class, + FilterTimestampColEqualLongScalar.class, FilterTimestampScalarEqualLongColumn.class, + FilterTimestampColEqualDoubleColumn.class, + FilterTimestampColEqualDoubleScalar.class, FilterTimestampScalarEqualDoubleColumn.class, + FilterLongColEqualTimestampColumn.class, + FilterLongColEqualTimestampScalar.class, FilterLongScalarEqualTimestampColumn.class, + FilterDoubleColEqualTimestampColumn.class, + FilterDoubleColEqualTimestampScalar.class, FilterDoubleScalarEqualTimestampColumn.class, + IntervalYearMonthScalarEqualIntervalYearMonthColumn.class, FilterIntervalYearMonthScalarEqualIntervalYearMonthColumn.class, IntervalYearMonthColEqualIntervalYearMonthScalar.class, FilterIntervalYearMonthColEqualIntervalYearMonthScalar.class, + + IntervalDayTimeColEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeColEqualIntervalDayTimeColumn.class, IntervalDayTimeScalarEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeScalarEqualIntervalDayTimeColumn.class, IntervalDayTimeColEqualIntervalDayTimeScalar.class, FilterIntervalDayTimeColEqualIntervalDayTimeScalar.class, + DateColEqualDateScalar.class,FilterDateColEqualDateScalar.class, DateScalarEqualDateColumn.class,FilterDateScalarEqualDateColumn.class, }) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java index bfd71c7..50c9d09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java @@ -38,32 +38,56 @@ DoubleColGreaterEqualLongScalar.class, DoubleColGreaterEqualDoubleScalar.class, LongScalarGreaterEqualLongColumn.class, LongScalarGreaterEqualDoubleColumn.class, DoubleScalarGreaterEqualLongColumn.class, DoubleScalarGreaterEqualDoubleColumn.class, + StringGroupColGreaterEqualStringGroupColumn.class, FilterStringGroupColGreaterEqualStringGroupColumn.class, StringGroupColGreaterEqualStringScalar.class, StringGroupColGreaterEqualVarCharScalar.class, StringGroupColGreaterEqualCharScalar.class, StringScalarGreaterEqualStringGroupColumn.class, VarCharScalarGreaterEqualStringGroupColumn.class, CharScalarGreaterEqualStringGroupColumn.class, + FilterStringGroupColGreaterEqualStringScalar.class, FilterStringScalarGreaterEqualStringGroupColumn.class, FilterStringGroupColGreaterEqualVarCharScalar.class, FilterVarCharScalarGreaterEqualStringGroupColumn.class, FilterStringGroupColGreaterEqualCharScalar.class, FilterCharScalarGreaterEqualStringGroupColumn.class, + FilterLongColGreaterEqualLongColumn.class, FilterLongColGreaterEqualDoubleColumn.class, FilterDoubleColGreaterEqualLongColumn.class, FilterDoubleColGreaterEqualDoubleColumn.class, FilterLongColGreaterEqualLongScalar.class, FilterLongColGreaterEqualDoubleScalar.class, FilterDoubleColGreaterEqualLongScalar.class, FilterDoubleColGreaterEqualDoubleScalar.class, FilterLongScalarGreaterEqualLongColumn.class, FilterLongScalarGreaterEqualDoubleColumn.class, FilterDoubleScalarGreaterEqualLongColumn.class, FilterDoubleScalarGreaterEqualDoubleColumn.class, + FilterDecimalColGreaterEqualDecimalColumn.class, FilterDecimalColGreaterEqualDecimalScalar.class, FilterDecimalScalarGreaterEqualDecimalColumn.class, + + TimestampColGreaterEqualTimestampColumn.class, TimestampColGreaterEqualTimestampScalar.class, TimestampScalarGreaterEqualTimestampColumn.class, + TimestampColGreaterEqualLongColumn.class, + TimestampColGreaterEqualLongScalar.class, TimestampScalarGreaterEqualLongColumn.class, + TimestampColGreaterEqualDoubleColumn.class, + TimestampColGreaterEqualDoubleScalar.class, TimestampScalarGreaterEqualDoubleColumn.class, + LongColGreaterEqualTimestampColumn.class, + LongColGreaterEqualTimestampScalar.class, LongScalarGreaterEqualTimestampColumn.class, + DoubleColGreaterEqualTimestampColumn.class, + DoubleColGreaterEqualTimestampScalar.class, DoubleScalarGreaterEqualTimestampColumn.class, + + FilterTimestampColGreaterEqualTimestampColumn.class, FilterTimestampColGreaterEqualTimestampScalar.class, FilterTimestampScalarGreaterEqualTimestampColumn.class, - TimestampColGreaterEqualLongScalar.class, LongScalarGreaterEqualTimestampColumn.class, - FilterTimestampColGreaterEqualLongScalar.class, FilterLongScalarGreaterEqualTimestampColumn.class, - TimestampColGreaterEqualDoubleScalar.class, DoubleScalarGreaterEqualTimestampColumn.class, - FilterTimestampColGreaterEqualDoubleScalar.class, FilterDoubleScalarGreaterEqualTimestampColumn.class, + FilterTimestampColGreaterEqualLongColumn.class, + FilterTimestampColGreaterEqualLongScalar.class, FilterTimestampScalarGreaterEqualLongColumn.class, + FilterTimestampColGreaterEqualDoubleColumn.class, + FilterTimestampColGreaterEqualDoubleScalar.class, FilterTimestampScalarGreaterEqualDoubleColumn.class, + FilterLongColGreaterEqualTimestampColumn.class, + FilterLongColGreaterEqualTimestampScalar.class, FilterLongScalarGreaterEqualTimestampColumn.class, + FilterDoubleColGreaterEqualTimestampColumn.class, + FilterDoubleColGreaterEqualTimestampScalar.class, FilterDoubleScalarGreaterEqualTimestampColumn.class, + IntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn.class, FilterIntervalYearMonthScalarGreaterEqualIntervalYearMonthColumn.class, IntervalYearMonthColGreaterEqualIntervalYearMonthScalar.class, FilterIntervalYearMonthColGreaterEqualIntervalYearMonthScalar.class, + + IntervalDayTimeColGreaterEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeColumn.class, IntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeScalarGreaterEqualIntervalDayTimeColumn.class, IntervalDayTimeColGreaterEqualIntervalDayTimeScalar.class, FilterIntervalDayTimeColGreaterEqualIntervalDayTimeScalar.class, + DateColGreaterEqualDateScalar.class,FilterDateColGreaterEqualDateScalar.class, DateScalarGreaterEqualDateColumn.class,FilterDateScalarGreaterEqualDateColumn.class, }) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java index 1e69ee6..c28d797 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java @@ -38,14 +38,17 @@ DoubleColLessEqualLongScalar.class, DoubleColLessEqualDoubleScalar.class, LongScalarLessEqualLongColumn.class, LongScalarLessEqualDoubleColumn.class, DoubleScalarLessEqualLongColumn.class, DoubleScalarLessEqualDoubleColumn.class, + StringGroupColLessEqualStringGroupColumn.class, FilterStringGroupColLessEqualStringGroupColumn.class, StringGroupColLessEqualStringScalar.class, StringGroupColLessEqualVarCharScalar.class, StringGroupColLessEqualCharScalar.class, StringScalarLessEqualStringGroupColumn.class, VarCharScalarLessEqualStringGroupColumn.class, CharScalarLessEqualStringGroupColumn.class, + FilterStringGroupColLessEqualStringScalar.class, FilterStringScalarLessEqualStringGroupColumn.class, FilterStringGroupColLessEqualVarCharScalar.class, FilterVarCharScalarLessEqualStringGroupColumn.class, FilterStringGroupColLessEqualCharScalar.class, FilterCharScalarLessEqualStringGroupColumn.class, + FilterLongColLessEqualLongColumn.class, FilterLongColLessEqualDoubleColumn.class, FilterDoubleColLessEqualLongColumn.class, FilterDoubleColLessEqualDoubleColumn.class, FilterLongColLessEqualLongScalar.class, FilterLongColLessEqualDoubleScalar.class, @@ -54,16 +57,36 @@ FilterDoubleScalarLessEqualLongColumn.class, FilterDoubleScalarLessEqualDoubleColumn.class, FilterDecimalColLessEqualDecimalColumn.class, FilterDecimalColLessEqualDecimalScalar.class, FilterDecimalScalarLessEqualDecimalColumn.class, + + TimestampColLessEqualTimestampColumn.class, TimestampColLessEqualTimestampScalar.class, TimestampScalarLessEqualTimestampColumn.class, + TimestampColLessEqualLongColumn.class, + TimestampColLessEqualLongScalar.class, TimestampScalarLessEqualLongColumn.class, + TimestampColLessEqualDoubleColumn.class, + TimestampColLessEqualDoubleScalar.class, TimestampScalarLessEqualDoubleColumn.class, + LongColLessEqualTimestampColumn.class, + LongColLessEqualTimestampScalar.class, LongScalarLessEqualTimestampColumn.class, + DoubleColLessEqualTimestampColumn.class, + DoubleColLessEqualTimestampScalar.class, DoubleScalarLessEqualTimestampColumn.class, + + FilterTimestampColLessEqualTimestampColumn.class, FilterTimestampColLessEqualTimestampScalar.class, FilterTimestampScalarLessEqualTimestampColumn.class, - TimestampColLessEqualLongScalar.class, LongScalarLessEqualTimestampColumn.class, - FilterTimestampColLessEqualLongScalar.class, FilterLongScalarLessEqualTimestampColumn.class, - TimestampColLessEqualDoubleScalar.class, DoubleScalarLessEqualTimestampColumn.class, - FilterTimestampColLessEqualDoubleScalar.class, FilterDoubleScalarLessEqualTimestampColumn.class, + FilterTimestampColLessEqualLongColumn.class, + FilterTimestampColLessEqualLongScalar.class, FilterTimestampScalarLessEqualLongColumn.class, + FilterTimestampColLessEqualDoubleColumn.class, + FilterTimestampColLessEqualDoubleScalar.class, FilterTimestampScalarLessEqualDoubleColumn.class, + FilterLongColLessEqualTimestampColumn.class, + FilterLongColLessEqualTimestampScalar.class, FilterLongScalarLessEqualTimestampColumn.class, + FilterDoubleColLessEqualTimestampColumn.class, + FilterDoubleColLessEqualTimestampScalar.class, FilterDoubleScalarLessEqualTimestampColumn.class, + IntervalYearMonthScalarLessEqualIntervalYearMonthColumn.class, FilterIntervalYearMonthScalarLessEqualIntervalYearMonthColumn.class, IntervalYearMonthColLessEqualIntervalYearMonthScalar.class, FilterIntervalYearMonthColLessEqualIntervalYearMonthScalar.class, + + IntervalDayTimeColLessEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeColLessEqualIntervalDayTimeColumn.class, IntervalDayTimeScalarLessEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeScalarLessEqualIntervalDayTimeColumn.class, IntervalDayTimeColLessEqualIntervalDayTimeScalar.class, FilterIntervalDayTimeColLessEqualIntervalDayTimeScalar.class, + DateColLessEqualDateScalar.class,FilterDateColLessEqualDateScalar.class, DateScalarLessEqualDateColumn.class,FilterDateScalarLessEqualDateColumn.class, }) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java index bba4d97..72fe43d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java @@ -38,14 +38,17 @@ DoubleColGreaterLongScalar.class, DoubleColGreaterDoubleScalar.class, LongScalarGreaterLongColumn.class, LongScalarGreaterDoubleColumn.class, DoubleScalarGreaterLongColumn.class, DoubleScalarGreaterDoubleColumn.class, + StringGroupColGreaterStringGroupColumn.class, FilterStringGroupColGreaterStringGroupColumn.class, StringGroupColGreaterStringScalar.class, StringGroupColGreaterVarCharScalar.class, StringGroupColGreaterCharScalar.class, StringScalarGreaterStringGroupColumn.class, VarCharScalarGreaterStringGroupColumn.class, CharScalarGreaterStringGroupColumn.class, + FilterStringGroupColGreaterStringScalar.class, FilterStringScalarGreaterStringGroupColumn.class, FilterStringGroupColGreaterVarCharScalar.class, FilterVarCharScalarGreaterStringGroupColumn.class, FilterStringGroupColGreaterCharScalar.class, FilterCharScalarGreaterStringGroupColumn.class, + FilterLongColGreaterLongColumn.class, FilterLongColGreaterDoubleColumn.class, FilterDoubleColGreaterLongColumn.class, FilterDoubleColGreaterDoubleColumn.class, FilterLongColGreaterLongScalar.class, FilterLongColGreaterDoubleScalar.class, @@ -54,16 +57,36 @@ FilterDoubleScalarGreaterLongColumn.class, FilterDoubleScalarGreaterDoubleColumn.class, FilterDecimalColGreaterDecimalColumn.class, FilterDecimalColGreaterDecimalScalar.class, FilterDecimalScalarGreaterDecimalColumn.class, + + TimestampColGreaterTimestampColumn.class, TimestampColGreaterTimestampScalar.class, TimestampScalarGreaterTimestampColumn.class, + TimestampColGreaterLongColumn.class, + TimestampColGreaterLongScalar.class, TimestampScalarGreaterLongColumn.class, + TimestampColGreaterDoubleColumn.class, + TimestampColGreaterDoubleScalar.class, TimestampScalarGreaterDoubleColumn.class, + LongColGreaterTimestampColumn.class, + LongColGreaterTimestampScalar.class, LongScalarGreaterTimestampColumn.class, + DoubleColGreaterTimestampColumn.class, + DoubleColGreaterTimestampScalar.class, DoubleScalarGreaterTimestampColumn.class, + + FilterTimestampColGreaterTimestampColumn.class, FilterTimestampColGreaterTimestampScalar.class, FilterTimestampScalarGreaterTimestampColumn.class, - TimestampColGreaterLongScalar.class, LongScalarGreaterTimestampColumn.class, - FilterTimestampColGreaterLongScalar.class, FilterLongScalarGreaterTimestampColumn.class, - TimestampColGreaterDoubleScalar.class, DoubleScalarGreaterTimestampColumn.class, - FilterTimestampColGreaterDoubleScalar.class, FilterDoubleScalarGreaterTimestampColumn.class, + FilterTimestampColGreaterLongColumn.class, + FilterTimestampColGreaterLongScalar.class, FilterTimestampScalarGreaterLongColumn.class, + FilterTimestampColGreaterDoubleColumn.class, + FilterTimestampColGreaterDoubleScalar.class, FilterTimestampScalarGreaterDoubleColumn.class, + FilterLongColGreaterTimestampColumn.class, + FilterLongColGreaterTimestampScalar.class, FilterLongScalarGreaterTimestampColumn.class, + FilterDoubleColGreaterTimestampColumn.class, + FilterDoubleColGreaterTimestampScalar.class, FilterDoubleScalarGreaterTimestampColumn.class, + IntervalYearMonthScalarGreaterIntervalYearMonthColumn.class, FilterIntervalYearMonthScalarGreaterIntervalYearMonthColumn.class, IntervalYearMonthColGreaterIntervalYearMonthScalar.class, FilterIntervalYearMonthColGreaterIntervalYearMonthScalar.class, + + IntervalDayTimeColGreaterIntervalDayTimeColumn.class, FilterIntervalDayTimeColGreaterIntervalDayTimeColumn.class, IntervalDayTimeScalarGreaterIntervalDayTimeColumn.class, FilterIntervalDayTimeScalarGreaterIntervalDayTimeColumn.class, IntervalDayTimeColGreaterIntervalDayTimeScalar.class, FilterIntervalDayTimeColGreaterIntervalDayTimeScalar.class, + DateColGreaterDateScalar.class,FilterDateColGreaterDateScalar.class, DateScalarGreaterDateColumn.class,FilterDateScalarGreaterDateColumn.class, }) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java index b992fe6..114d190 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java @@ -38,6 +38,7 @@ DoubleColLessLongScalar.class, DoubleColLessDoubleScalar.class, LongScalarLessLongColumn.class, LongScalarLessDoubleColumn.class, DoubleScalarLessLongColumn.class, DoubleScalarLessDoubleColumn.class, + StringGroupColLessStringGroupColumn.class, FilterStringGroupColLessStringGroupColumn.class, StringGroupColLessStringScalar.class, StringGroupColLessVarCharScalar.class, StringGroupColLessCharScalar.class, @@ -46,24 +47,46 @@ FilterStringGroupColLessStringScalar.class, FilterStringScalarLessStringGroupColumn.class, FilterStringGroupColLessVarCharScalar.class, FilterVarCharScalarLessStringGroupColumn.class, FilterStringGroupColLessCharScalar.class, FilterCharScalarLessStringGroupColumn.class, + FilterLongColLessLongColumn.class, FilterLongColLessDoubleColumn.class, FilterDoubleColLessLongColumn.class, FilterDoubleColLessDoubleColumn.class, FilterLongColLessLongScalar.class, FilterLongColLessDoubleScalar.class, FilterDoubleColLessLongScalar.class, FilterDoubleColLessDoubleScalar.class, FilterLongScalarLessLongColumn.class, FilterLongScalarLessDoubleColumn.class, FilterDoubleScalarLessLongColumn.class, FilterDoubleScalarLessDoubleColumn.class, + FilterDecimalColLessDecimalColumn.class, FilterDecimalColLessDecimalScalar.class, FilterDecimalScalarLessDecimalColumn.class, + + TimestampColLessTimestampColumn.class, TimestampColLessTimestampScalar.class, TimestampScalarLessTimestampColumn.class, + TimestampColLessLongColumn.class, + TimestampColLessLongScalar.class, TimestampScalarLessLongColumn.class, + TimestampColLessDoubleColumn.class, + TimestampColLessDoubleScalar.class, TimestampScalarLessDoubleColumn.class, + LongColLessTimestampColumn.class, + LongColLessTimestampScalar.class, LongScalarLessTimestampColumn.class, + DoubleColLessTimestampColumn.class, + DoubleColLessTimestampScalar.class, DoubleScalarLessTimestampColumn.class, + + FilterTimestampColLessTimestampColumn.class, FilterTimestampColLessTimestampScalar.class, FilterTimestampScalarLessTimestampColumn.class, - TimestampColLessLongScalar.class, LongScalarLessTimestampColumn.class, - FilterTimestampColLessLongScalar.class, FilterLongScalarLessTimestampColumn.class, - TimestampColLessDoubleScalar.class, DoubleScalarLessTimestampColumn.class, - FilterTimestampColLessDoubleScalar.class, FilterDoubleScalarLessTimestampColumn.class, + FilterTimestampColLessLongColumn.class, + FilterTimestampColLessLongScalar.class, FilterTimestampScalarLessLongColumn.class, + FilterTimestampColLessDoubleColumn.class, + FilterTimestampColLessDoubleScalar.class, FilterTimestampScalarLessDoubleColumn.class, + FilterLongColLessTimestampColumn.class, + FilterLongColLessTimestampScalar.class, FilterLongScalarLessTimestampColumn.class, + FilterDoubleColLessTimestampColumn.class, + FilterDoubleColLessTimestampScalar.class, FilterDoubleScalarLessTimestampColumn.class, + IntervalYearMonthScalarLessIntervalYearMonthColumn.class, FilterIntervalYearMonthScalarLessIntervalYearMonthColumn.class, IntervalYearMonthColLessIntervalYearMonthScalar.class, FilterIntervalYearMonthColLessIntervalYearMonthScalar.class, + + IntervalDayTimeColLessIntervalDayTimeColumn.class, FilterIntervalDayTimeColLessIntervalDayTimeColumn.class, IntervalDayTimeScalarLessIntervalDayTimeColumn.class, FilterIntervalDayTimeScalarLessIntervalDayTimeColumn.class, IntervalDayTimeColLessIntervalDayTimeScalar.class, FilterIntervalDayTimeColLessIntervalDayTimeScalar.class, + DateColLessDateScalar.class,FilterDateColLessDateScalar.class, DateScalarLessDateColumn.class,FilterDateScalarLessDateColumn.class, }) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java index d6a0c58..ca01b8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPMinus.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.*; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java index ad47681..ed6aa36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java @@ -37,32 +37,56 @@ DoubleColNotEqualLongScalar.class, DoubleColNotEqualDoubleScalar.class, LongScalarNotEqualLongColumn.class, LongScalarNotEqualDoubleColumn.class, DoubleScalarNotEqualLongColumn.class, DoubleScalarNotEqualDoubleColumn.class, + StringGroupColNotEqualStringGroupColumn.class, FilterStringGroupColNotEqualStringGroupColumn.class, StringGroupColNotEqualStringScalar.class, StringGroupColNotEqualVarCharScalar.class, StringGroupColNotEqualCharScalar.class, StringScalarNotEqualStringGroupColumn.class, VarCharScalarNotEqualStringGroupColumn.class, CharScalarNotEqualStringGroupColumn.class, + FilterStringGroupColNotEqualStringScalar.class, FilterStringScalarNotEqualStringGroupColumn.class, FilterStringGroupColNotEqualVarCharScalar.class, FilterVarCharScalarNotEqualStringGroupColumn.class, FilterStringGroupColNotEqualCharScalar.class, FilterCharScalarNotEqualStringGroupColumn.class, + FilterLongColNotEqualLongColumn.class, FilterLongColNotEqualDoubleColumn.class, FilterDoubleColNotEqualLongColumn.class, FilterDoubleColNotEqualDoubleColumn.class, FilterLongColNotEqualLongScalar.class, FilterLongColNotEqualDoubleScalar.class, FilterDoubleColNotEqualLongScalar.class, FilterDoubleColNotEqualDoubleScalar.class, FilterLongScalarNotEqualLongColumn.class, FilterLongScalarNotEqualDoubleColumn.class, FilterDoubleScalarNotEqualLongColumn.class, FilterDoubleScalarNotEqualDoubleColumn.class, + FilterDecimalColNotEqualDecimalColumn.class, FilterDecimalColNotEqualDecimalScalar.class, FilterDecimalScalarNotEqualDecimalColumn.class, + + TimestampColNotEqualTimestampColumn.class, TimestampColNotEqualTimestampScalar.class, TimestampScalarNotEqualTimestampColumn.class, + TimestampColNotEqualLongColumn.class, + TimestampColNotEqualLongScalar.class, TimestampScalarNotEqualLongColumn.class, + TimestampColNotEqualDoubleColumn.class, + TimestampColNotEqualDoubleScalar.class, TimestampScalarNotEqualDoubleColumn.class, + LongColNotEqualTimestampColumn.class, + LongColNotEqualTimestampScalar.class, LongScalarNotEqualTimestampColumn.class, + DoubleColNotEqualTimestampColumn.class, + DoubleColNotEqualTimestampScalar.class, DoubleScalarNotEqualTimestampColumn.class, + + FilterTimestampColNotEqualTimestampColumn.class, FilterTimestampColNotEqualTimestampScalar.class, FilterTimestampScalarNotEqualTimestampColumn.class, - TimestampColNotEqualLongScalar.class, LongScalarNotEqualTimestampColumn.class, - FilterTimestampColNotEqualLongScalar.class, FilterLongScalarNotEqualTimestampColumn.class, - TimestampColNotEqualDoubleScalar.class, DoubleScalarNotEqualTimestampColumn.class, - FilterTimestampColNotEqualDoubleScalar.class, FilterDoubleScalarNotEqualTimestampColumn.class, + FilterTimestampColNotEqualLongColumn.class, + FilterTimestampColNotEqualLongScalar.class, FilterTimestampScalarNotEqualLongColumn.class, + FilterTimestampColNotEqualDoubleColumn.class, + FilterTimestampColNotEqualDoubleScalar.class, FilterTimestampScalarNotEqualDoubleColumn.class, + FilterLongColNotEqualTimestampColumn.class, + FilterLongColNotEqualTimestampScalar.class, FilterLongScalarNotEqualTimestampColumn.class, + FilterDoubleColNotEqualTimestampColumn.class, + FilterDoubleColNotEqualTimestampScalar.class, FilterDoubleScalarNotEqualTimestampColumn.class, + IntervalYearMonthScalarNotEqualIntervalYearMonthColumn.class, FilterIntervalYearMonthScalarNotEqualIntervalYearMonthColumn.class, IntervalYearMonthColNotEqualIntervalYearMonthScalar.class, FilterIntervalYearMonthColNotEqualIntervalYearMonthScalar.class, + + IntervalDayTimeColNotEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeColNotEqualIntervalDayTimeColumn.class, IntervalDayTimeScalarNotEqualIntervalDayTimeColumn.class, FilterIntervalDayTimeScalarNotEqualIntervalDayTimeColumn.class, IntervalDayTimeColNotEqualIntervalDayTimeScalar.class, FilterIntervalDayTimeColNotEqualIntervalDayTimeScalar.class, + DateColNotEqualDateScalar.class,FilterDateColNotEqualDateScalar.class, DateScalarNotEqualDateColumn.class,FilterDateScalarNotEqualDateColumn.class, }) diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java index 5755a99..b7e36f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPPlus.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.*; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; /** diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java index 9f3ab91..4d9691e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTimestamp.java @@ -25,8 +25,8 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToTimestampViaDoubleToLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToTimestampViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDoubleToTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; @@ -47,8 +47,8 @@ */ @Description(name = "timestamp", value = "cast(date as timestamp) - Returns timestamp") -@VectorizedExpressions({CastLongToTimestampViaLongToLong.class, - CastDoubleToTimestampViaDoubleToLong.class, CastDecimalToTimestamp.class}) +@VectorizedExpressions({CastLongToTimestamp.class, + CastDoubleToTimestamp.class, CastDecimalToTimestamp.class}) public class GenericUDFTimestamp extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java index 0613005..7e8a472 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastLongToDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToDate; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDate; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -41,7 +42,7 @@ + "Example:\n " + " > SELECT CAST('2009-01-01' AS DATE) FROM src LIMIT 1;\n" + " '2009-01-01'") -@VectorizedExpressions({CastStringToDate.class, CastLongToDate.class}) +@VectorizedExpressions({CastStringToDate.class, CastLongToDate.class, CastTimestampToDate.class}) public class GenericUDFToDate extends GenericUDF { private transient PrimitiveObjectInspector argumentOI; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index 4ab5389..819de77 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -27,8 +27,9 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -50,7 +51,7 @@ @Description(name = "to_unix_timestamp", value = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp", extended = "Converts the specified time to number of seconds since 1970-01-01.") -@VectorizedExpressions({VectorUDFUnixTimeStampLong.class, VectorUDFUnixTimeStampString.class}) +@VectorizedExpressions({VectorUDFUnixTimeStampDate.class, VectorUDFUnixTimeStampString.class, VectorUDFUnixTimeStampTimestamp.class}) public class GenericUDFToUnixTimeStamp extends GenericUDF { private transient DateObjectInspector inputDateOI; diff --git ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java index ff28995..e092ac2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java +++ ql/src/java/org/apache/hadoop/hive/ql/util/DateTimeMath.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hive.common.util.DateUtils; @@ -90,6 +91,26 @@ public long addMonthsToNanosLocal(long nanos, int months) { return result; } + /** + * Perform month arithmetic to millis value using local time zone. + * @param pisaTimestamp + * @param months + * @return + */ + public PisaTimestamp addMonthsToPisaTimestamp(PisaTimestamp pisaTimestamp, int months, + PisaTimestamp scratchPisaTimestamp) { + calLocal.setTimeInMillis(pisaTimestamp.getTimestampMilliseconds()); + calLocal.add(Calendar.MONTH, months); + scratchPisaTimestamp.updateFromTimestampMilliseconds(calLocal.getTimeInMillis()); + + // Add in portion of nanos below a millisecond... + PisaTimestamp.add( + scratchPisaTimestamp.getEpochDay(), scratchPisaTimestamp.getNanoOfDay(), + 0, pisaTimestamp.getNanoOfDay() % 1000000, + scratchPisaTimestamp); + return scratchPisaTimestamp; + } + public long addMonthsToDays(long days, int months) { long millis = DateWritable.daysToMillis((int) days); millis = addMonthsToMillisLocal(millis, months); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 515ea7b..e4c7529 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -50,6 +50,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampColumnScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprTimestampScalarScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; @@ -67,11 +71,13 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLower; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUpper; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampDate; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearDate; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; @@ -151,10 +157,10 @@ @Test public void testVectorExpressionDescriptor() { - VectorUDFUnixTimeStampLong v1 = new VectorUDFUnixTimeStampLong(); + VectorUDFUnixTimeStampDate v1 = new VectorUDFUnixTimeStampDate(); VectorExpressionDescriptor.Builder builder1 = new VectorExpressionDescriptor.Builder(); VectorExpressionDescriptor.Descriptor d1 = builder1.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1).setArgumentTypes(VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY) + .setNumArguments(1).setArgumentTypes(VectorExpressionDescriptor.ArgumentType.INT_DATE_INTERVAL_YEAR_MONTH) .setInputExpressionTypes(VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); assertTrue(d1.matches(v1.getDescriptor())); @@ -1086,14 +1092,14 @@ public void testTimeStampUdfs() throws HiveException { tsFuncExpr.setGenericUDF(gudfBridge); tsFuncExpr.setChildren(children); VectorExpression ve = vc.getVectorExpression(tsFuncExpr); - Assert.assertEquals(VectorUDFYearLong.class, ve.getClass()); + Assert.assertEquals(VectorUDFYearTimestamp.class, ve.getClass()); //GenericUDFToUnixTimeStamp GenericUDFToUnixTimeStamp gudf = new GenericUDFToUnixTimeStamp(); tsFuncExpr.setGenericUDF(gudf); tsFuncExpr.setTypeInfo(TypeInfoFactory.longTypeInfo); ve = vc.getVectorExpression(tsFuncExpr); - Assert.assertEquals(VectorUDFUnixTimeStampLong.class, ve.getClass()); + Assert.assertEquals(VectorUDFUnixTimeStampTimestamp.class, ve.getClass()); } @Test @@ -1353,7 +1359,7 @@ public void testIfConditionalExprs() throws HiveException { children1.set(1, col2Expr); children1.set(2, col3Expr); ve = vc.getVectorExpression(exprDesc); - assertTrue(ve instanceof IfExprLongColumnLongColumn); + assertTrue(ve instanceof IfExprTimestampColumnColumn); // timestamp column/scalar IF where scalar is really a CAST of a constant to timestamp. ExprNodeGenericFuncDesc f = new ExprNodeGenericFuncDesc(); @@ -1368,20 +1374,20 @@ public void testIfConditionalExprs() throws HiveException { // We check for two different classes below because initially the result // is IfExprLongColumnLongColumn but in the future if the system is enhanced // with constant folding then the result will be IfExprLongColumnLongScalar. - assertTrue(IfExprLongColumnLongColumn.class == ve.getClass() - || IfExprLongColumnLongScalar.class == ve.getClass()); + assertTrue(IfExprTimestampColumnColumn.class == ve.getClass() + || IfExprTimestampColumnScalar.class == ve.getClass()); // timestamp scalar/scalar children1.set(1, f); ve = vc.getVectorExpression(exprDesc); - assertTrue(IfExprLongColumnLongColumn.class == ve.getClass() - || IfExprLongScalarLongScalar.class == ve.getClass()); + assertTrue(IfExprTimestampColumnColumn.class == ve.getClass() + || IfExprTimestampScalarScalar.class == ve.getClass()); // timestamp scalar/column children1.set(2, col3Expr); ve = vc.getVectorExpression(exprDesc); - assertTrue(IfExprLongColumnLongColumn.class == ve.getClass() - || IfExprLongScalarLongColumn.class == ve.getClass()); + assertTrue(IfExprTimestampColumnColumn.class == ve.getClass() + || IfExprTimestampScalarColumn.class == ve.getClass()); // test for boolean type col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java index c2bf85a..7b07293 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java @@ -44,9 +44,9 @@ public void testConstantExpression() { String str = "alpha"; ConstantVectorExpression bytesCve = new ConstantVectorExpression(2, str.getBytes()); HiveDecimal decVal = HiveDecimal.create("25.8"); - ConstantVectorExpression decimalCve = new ConstantVectorExpression(3, decVal); + ConstantVectorExpression decimalCve = new ConstantVectorExpression(3, decVal, "decimal"); ConstantVectorExpression nullCve = new ConstantVectorExpression(4, "string", true); - + int size = 20; VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(size, 5, 0); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java index 9c4a751..58cecc1 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java @@ -114,7 +114,7 @@ private void compareToUDFYearDate(long t, int y) { private void verifyUDFYear(VectorizedRowBatch batch) { VectorExpression udf = null; - udf = new VectorUDFYearLong(0, 1); + udf = new VectorUDFYearDate(0, 1); udf.setInputTypes(VectorExpression.Type.DATE); udf.evaluate(batch); final int in = 0; @@ -176,7 +176,7 @@ private void compareToUDFDayOfMonthDate(long t, int y) { private void verifyUDFDayOfMonth(VectorizedRowBatch batch) { VectorExpression udf = null; - udf = new VectorUDFDayOfMonthLong(0, 1); + udf = new VectorUDFDayOfMonthDate(0, 1); udf.setInputTypes(VectorExpression.Type.DATE); udf.evaluate(batch); final int in = 0; @@ -238,7 +238,7 @@ private void compareToUDFMonthDate(long t, int y) { private void verifyUDFMonth(VectorizedRowBatch batch) { VectorExpression udf; - udf = new VectorUDFMonthLong(0, 1); + udf = new VectorUDFMonthDate(0, 1); udf.setInputTypes(VectorExpression.Type.DATE); udf.evaluate(batch); final int in = 0; @@ -314,7 +314,7 @@ private void compareToUDFUnixTimeStampDate(long t, long y) { private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch) { VectorExpression udf; - udf = new VectorUDFUnixTimeStampLong(0, 1); + udf = new VectorUDFUnixTimeStampDate(0, 1); udf.setInputTypes(VectorExpression.Type.DATE); udf.evaluate(batch); final int in = 0; @@ -376,7 +376,7 @@ private void compareToUDFWeekOfYearDate(long t, int y) { private void verifyUDFWeekOfYear(VectorizedRowBatch batch) { VectorExpression udf; - udf = new VectorUDFWeekOfYearLong(0, 1); + udf = new VectorUDFWeekOfYearDate(0, 1); udf.setInputTypes(VectorExpression.Type.DATE); udf.evaluate(batch); final int in = 0; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java index 6523e7b..fc38dd3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorExpressionWriters.java @@ -25,14 +25,13 @@ import junit.framework.Assert; -import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -45,7 +44,6 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -84,6 +82,11 @@ private Writable getWritableValue(TypeInfo ti, double value) { return null; } + + private Writable getWritableValue(TypeInfo ti, Timestamp value) { + return new TimestampWritable(value); + } + private Writable getWritableValue(TypeInfo ti, HiveDecimal value) { return new HiveDecimalWritable(value); } @@ -113,7 +116,6 @@ private Writable getWritableValue(TypeInfo ti, long value) { return new BooleanWritable( value == 0 ? false : true); } else if (ti.equals(TypeInfoFactory.timestampTypeInfo)) { Timestamp ts = new Timestamp(value); - TimestampUtils.assignTimeInNanoSec(value, ts); TimestampWritable tw = new TimestampWritable(ts); return tw; } @@ -199,13 +201,6 @@ private void testWriterLong(TypeInfo type) throws HiveException { Writable w = (Writable) vew.writeValue(lcv, i); if (w != null) { Writable expected = getWritableValue(type, lcv.vector[i]); - if (expected instanceof TimestampWritable) { - TimestampWritable t1 = (TimestampWritable) expected; - TimestampWritable t2 = (TimestampWritable) w; - Assert.assertTrue(t1.getNanos() == t2.getNanos()); - Assert.assertTrue(t1.getSeconds() == t2.getSeconds()); - continue; - } Assert.assertEquals(expected, w); } else { Assert.assertTrue(lcv.isNull[i]); @@ -226,20 +221,56 @@ private void testSetterLong(TypeInfo type) throws HiveException { values[i] = vew.setValue(values[i], lcv, i); if (values[i] != null) { Writable expected = getWritableValue(type, lcv.vector[i]); - if (expected instanceof TimestampWritable) { - TimestampWritable t1 = (TimestampWritable) expected; - TimestampWritable t2 = (TimestampWritable) values[i]; - Assert.assertTrue(t1.getNanos() == t2.getNanos()); - Assert.assertTrue(t1.getSeconds() == t2.getSeconds()); - continue; - } Assert.assertEquals(expected, values[i]); } else { Assert.assertTrue(lcv.isNull[i]); } } } - + + private void testWriterTimestamp(TypeInfo type) throws HiveException { + TimestampColumnVector tcv = VectorizedRowGroupGenUtil.generateTimestampColumnVector(true, false, + vectorSize, new Random(10)); + tcv.isNull[3] = true; + VectorExpressionWriter vew = getWriter(type); + for (int i = 0; i < vectorSize; i++) { + Writable w = (Writable) vew.writeValue(tcv, i); + if (w != null) { + Writable expected = getWritableValue(type, tcv.asScratchTimestamp(i)); + TimestampWritable t1 = (TimestampWritable) expected; + TimestampWritable t2 = (TimestampWritable) w; + Assert.assertTrue(t1.getNanos() == t2.getNanos()); + Assert.assertTrue(t1.getSeconds() == t2.getSeconds()); + } else { + Assert.assertTrue(tcv.isNull[i]); + } + } + } + + private void testSetterTimestamp(TypeInfo type) throws HiveException { + TimestampColumnVector tcv = VectorizedRowGroupGenUtil.generateTimestampColumnVector(true, false, + vectorSize, new Random(10)); + tcv.isNull[3] = true; + + Object[] values = new Object[this.vectorSize]; + + VectorExpressionWriter vew = getWriter(type); + for (int i = 0; i < vectorSize; i++) { + values[i] = null; // setValue() should be able to handle null input + values[i] = vew.setValue(values[i], tcv, i); + if (values[i] != null) { + Timestamp scratchTimestamp = tcv.asScratchTimestamp(i); + Writable expected = getWritableValue(type, scratchTimestamp); + TimestampWritable t1 = (TimestampWritable) expected; + TimestampWritable t2 = (TimestampWritable) values[i]; + Assert.assertTrue(t1.getNanos() == t2.getNanos()); + Assert.assertTrue(t1.getSeconds() == t2.getSeconds()); + } else { + Assert.assertTrue(tcv.isNull[i]); + } + } + } + private StructObjectInspector genStructOI() { ArrayList fieldNames1 = new ArrayList(); fieldNames1.add("theInt"); @@ -427,14 +458,14 @@ public void testVectorExpressionSetterBoolean() throws HiveException { @Test public void testVectorExpressionWriterTimestamp() throws HiveException { - testWriterLong(TypeInfoFactory.timestampTypeInfo); + testWriterTimestamp(TypeInfoFactory.timestampTypeInfo); } @Test public void testVectorExpressionSetterTimestamp() throws HiveException { - testSetterLong(TypeInfoFactory.timestampTypeInfo); + testSetterTimestamp(TypeInfoFactory.timestampTypeInfo); } - + @Test public void testVectorExpressionWriterByte() throws HiveException { testWriterLong(TypeInfoFactory.byteTypeInfo); @@ -469,67 +500,9 @@ public void testVectorExpressionSetterVarchar() throws HiveException { public void testVectorExpressionWriterBinary() throws HiveException { testWriterText(TypeInfoFactory.binaryTypeInfo); } - + @Test public void testVectorExpressionSetterBinary() throws HiveException { testSetterText(TypeInfoFactory.binaryTypeInfo); } - - @Test - public void testTimeStampUtils(){ - Timestamp ts = new Timestamp(0); - - // Convert positive nanoseconds to timestamp object. - TimestampUtils.assignTimeInNanoSec(1234567891, ts); - Assert.assertEquals(234567891, ts.getNanos()); - Assert.assertEquals(1234567891, TimestampUtils.getTimeNanoSec(ts)); - - // Test negative nanoseconds - TimestampUtils.assignTimeInNanoSec(-1234567891, ts); - Assert.assertEquals((1000000000-234567891), ts.getNanos()); - Assert.assertEquals(-1234567891, TimestampUtils.getTimeNanoSec(ts)); - - // Test positive value smaller than a second. - TimestampUtils.assignTimeInNanoSec(234567891, ts); - Assert.assertEquals(234567891, ts.getNanos()); - Assert.assertEquals(234567891, TimestampUtils.getTimeNanoSec(ts)); - - // Test negative value smaller than a second. - TimestampUtils.assignTimeInNanoSec(-234567891, ts); - Assert.assertEquals((1000000000-234567891), ts.getNanos()); - Assert.assertEquals(-234567891, TimestampUtils.getTimeNanoSec(ts)); - - // Test a positive long timestamp - long big = 152414813551296L; - TimestampUtils.assignTimeInNanoSec(big, ts); - Assert.assertEquals(big % 1000000000, ts.getNanos()); - Assert.assertEquals(big, TimestampUtils.getTimeNanoSec(ts)); - - // Test a negative long timestamp - big = -152414813551296L; - TimestampUtils.assignTimeInNanoSec(big, ts); - Assert.assertEquals((1000000000 + (big % 1000000000)), ts.getNanos()); - Assert.assertEquals(big, TimestampUtils.getTimeNanoSec(ts)); - - // big/1000000 will yield zero nanoseconds - big = -1794750230000828416L; - ts = new Timestamp(0); - TimestampUtils.assignTimeInNanoSec(big, ts); - Assert.assertEquals((1000000000 + big % 1000000000), ts.getNanos()); - Assert.assertEquals(big, TimestampUtils.getTimeNanoSec(ts)); - - // Very small nanosecond part - big = 1700000000000000016L; - ts = new Timestamp(0); - TimestampUtils.assignTimeInNanoSec(big, ts); - Assert.assertEquals(big % 1000000000, ts.getNanos()); - Assert.assertEquals(big, TimestampUtils.getTimeNanoSec(ts)); - - // Very small nanosecond part - big = -1700000000000000016L; - ts = new Timestamp(0); - TimestampUtils.assignTimeInNanoSec(big, ts); - Assert.assertEquals((1000000000 + big % 1000000000), ts.getNanos()); - Assert.assertEquals(big, TimestampUtils.getTimeNanoSec(ts)); - } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java index 3841317..319474e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java @@ -25,11 +25,12 @@ import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColGreaterEqualDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColLessDecimalScalar; @@ -49,6 +50,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColEqualDecimalScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalColEqualDecimalColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDecimalScalarEqualDecimalColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterTimestampColumnNotBetween; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongScalar; import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.junit.Assert; @@ -586,23 +589,23 @@ public void testFilterStringNotBetween() { @Test public void testFilterTimestampBetween() { - int seed = 17; - VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch( - 5, 2, seed); - LongColumnVector lcv0 = (LongColumnVector) vrb.cols[0]; - long startTS = 0; // the epoch - long endTS = TimestampUtils.getTimeNanoSec( - Timestamp.valueOf("2013-11-05 00:00:00.000000000")); + + VectorizedRowBatch vrb = new VectorizedRowBatch(1); + vrb.cols[0] = new TimestampColumnVector(); + + TimestampColumnVector lcv0 = (TimestampColumnVector) vrb.cols[0]; + Timestamp startTS = new Timestamp(0); // the epoch + Timestamp endTS = Timestamp.valueOf("2013-11-05 00:00:00.000000000"); Timestamp ts0 = Timestamp.valueOf("1963-11-06 00:00:00.000"); - lcv0.vector[0] = TimestampUtils.getTimeNanoSec(ts0); + lcv0.pisaTimestampUpdate(new PisaTimestamp(ts0), 0); Timestamp ts1 = Timestamp.valueOf("1983-11-06 00:00:00.000"); - lcv0.vector[1] = TimestampUtils.getTimeNanoSec(ts1); + lcv0.pisaTimestampUpdate(new PisaTimestamp(ts1), 1); Timestamp ts2 = Timestamp.valueOf("2099-11-06 00:00:00.000"); - lcv0.vector[2] = TimestampUtils.getTimeNanoSec(ts2); + lcv0.pisaTimestampUpdate(new PisaTimestamp(ts2), 2); vrb.size = 3; - VectorExpression expr1 = new FilterLongColumnBetween(0, startTS, endTS); + VectorExpression expr1 = new FilterTimestampColumnBetween(0, startTS, endTS); expr1.evaluate(vrb); assertEquals(1, vrb.size); assertEquals(true, vrb.selectedInUse); @@ -611,24 +614,22 @@ public void testFilterTimestampBetween() { @Test public void testFilterTimestampNotBetween() { - int seed = 17; - VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch( - 5, 2, seed); - LongColumnVector lcv0 = (LongColumnVector) vrb.cols[0]; - long startTS = TimestampUtils.getTimeNanoSec( - Timestamp.valueOf("2013-11-05 00:00:00.000000000")); - long endTS = TimestampUtils.getTimeNanoSec( - Timestamp.valueOf("2013-11-05 00:00:00.000000010")); + VectorizedRowBatch vrb = new VectorizedRowBatch(1); + vrb.cols[0] = new TimestampColumnVector(); + + TimestampColumnVector lcv0 = (TimestampColumnVector) vrb.cols[0]; + Timestamp startTS = Timestamp.valueOf("2013-11-05 00:00:00.000000000"); + Timestamp endTS = Timestamp.valueOf("2013-11-05 00:00:00.000000010"); Timestamp ts0 = Timestamp.valueOf("2013-11-04 00:00:00.000000000"); - lcv0.vector[0] = TimestampUtils.getTimeNanoSec(ts0); + lcv0.pisaTimestampUpdate(new PisaTimestamp(ts0), 0); Timestamp ts1 = Timestamp.valueOf("2013-11-05 00:00:00.000000002"); - lcv0.vector[1] = TimestampUtils.getTimeNanoSec(ts1); + lcv0.pisaTimestampUpdate(new PisaTimestamp(ts1), 1); Timestamp ts2 = Timestamp.valueOf("2099-11-06 00:00:00.000"); - lcv0.vector[2] = TimestampUtils.getTimeNanoSec(ts2); + lcv0.pisaTimestampUpdate(new PisaTimestamp(ts2), 2); vrb.size = 3; - VectorExpression expr1 = new FilterLongColumnNotBetween(0, startTS, endTS); + VectorExpression expr1 = new FilterTimestampColumnNotBetween(0, startTS, endTS); expr1.evaluate(vrb); assertEquals(2, vrb.size); assertEquals(true, vrb.selectedInUse); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java index 74f4671..3f2b031 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.junit.Assert; @@ -29,6 +30,7 @@ import java.io.UnsupportedEncodingException; import java.sql.Date; +import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.List; @@ -53,21 +55,21 @@ private LongColumnVector newRandomLongColumnVector(int range, int size) { return vector; } - private LongColumnVector toTimestamp(LongColumnVector date) { - LongColumnVector vector = new LongColumnVector(size); + private TimestampColumnVector toTimestamp(LongColumnVector date) { + TimestampColumnVector vector = new TimestampColumnVector(size); for (int i = 0; i < size; i++) { if (date.isNull[i]) { vector.isNull[i] = true; vector.noNulls = false; } else { - vector.vector[i] = toTimestamp(date.vector[i]); + vector.set(i, toTimestamp(date.vector[i])); } } return vector; } - private long toTimestamp(long date) { - return DateWritable.daysToMillis((int) date) * 1000000; + private Timestamp toTimestamp(long date) { + return new Timestamp(DateWritable.daysToMillis((int) date)); } private BytesColumnVector toString(LongColumnVector date) { @@ -474,7 +476,7 @@ public void testDateDiffScalarCol() { } VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); - udf = new VectorUDFDateDiffScalarCol(0, 0, 1); + udf = new VectorUDFDateDiffScalarCol(new Timestamp(0), 0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); @@ -615,7 +617,7 @@ public void testDateDiffColCol() { udf.setInputTypes(VectorExpression.Type.STRING, VectorExpression.Type.TIMESTAMP); batch.cols[0] = new BytesColumnVector(1); - batch.cols[1] = new LongColumnVector(1); + batch.cols[1] = new TimestampColumnVector(1); batch.cols[2] = new LongColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; @@ -625,7 +627,7 @@ public void testDateDiffColCol() { Assert.assertEquals(batch.cols[2].isNull[0], true); udf.setInputTypes(VectorExpression.Type.TIMESTAMP, VectorExpression.Type.STRING); - batch.cols[0] = new LongColumnVector(1); + batch.cols[0] = new TimestampColumnVector(1); batch.cols[1] = new BytesColumnVector(1); batch.cols[2] = new LongColumnVector(1); bcv = (BytesColumnVector) batch.cols[1]; @@ -640,6 +642,8 @@ private void validateDate(VectorizedRowBatch batch, VectorExpression.Type colTyp VectorExpression udf; if (colType == VectorExpression.Type.STRING) { udf = new VectorUDFDateString(0, 1); + } else if (colType == VectorExpression.Type.TIMESTAMP) { + udf = new VectorUDFDateTimestamp(0, 1); } else { udf = new VectorUDFDateLong(0, 1); } @@ -708,6 +712,8 @@ private void validateToDate(VectorizedRowBatch batch, VectorExpression.Type colT colType == VectorExpression.Type.CHAR || colType == VectorExpression.Type.VARCHAR) { udf = new CastStringToDate(0, 1); + } else if (colType == VectorExpression.Type.TIMESTAMP) { + udf = new CastTimestampToDate(0, 1); } else { udf = new CastLongToDate(0, 1); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index 77365a8..c14eb4a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -23,9 +23,11 @@ import junit.framework.Assert; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncACosDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncASinDoubleToDouble; @@ -130,6 +132,27 @@ public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() { return batch; } + public static VectorizedRowBatch getVectorizedRowBatchDoubleInTimestampOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + TimestampColumnVector tcv; + DoubleColumnVector dcv; + tcv = new TimestampColumnVector(); + dcv = new DoubleColumnVector(); + dcv.vector[0] = -1.5d; + dcv.vector[1] = -0.5d; + dcv.vector[2] = -0.1d; + dcv.vector[3] = 0d; + dcv.vector[4] = 0.5d; + dcv.vector[5] = 0.7d; + dcv.vector[6] = 1.5d; + + batch.cols[0] = dcv; + batch.cols[1] = tcv; + + batch.size = 7; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchDoubleInDoubleOut() { VectorizedRowBatch batch = new VectorizedRowBatch(2); DoubleColumnVector inV; @@ -171,6 +194,25 @@ public static VectorizedRowBatch getVectorizedRowBatchLongInDoubleOut() { return batch; } + public static VectorizedRowBatch getVectorizedRowBatchTimestampInDoubleOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + TimestampColumnVector tcv; + DoubleColumnVector dcv; + tcv = new TimestampColumnVector(); + dcv = new DoubleColumnVector(); + tcv.set(0, new PisaTimestamp(0, -2)); + tcv.set(1, new PisaTimestamp(0, -1)); + tcv.set(2, new PisaTimestamp(0, 0)); + tcv.set(3, new PisaTimestamp(0, 1)); + tcv.set(4, new PisaTimestamp(0, 2)); + + batch.cols[0] = tcv; + batch.cols[1] = dcv; + + batch.size = 5; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchLongInLongOut() { VectorizedRowBatch batch = new VectorizedRowBatch(2); LongColumnVector inV, outV; @@ -186,6 +228,38 @@ public static VectorizedRowBatch getVectorizedRowBatchLongInLongOut() { return batch; } + public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + TimestampColumnVector inV; + LongColumnVector outV; + inV = new TimestampColumnVector(); + outV = new LongColumnVector(); + inV.setTimestampSeconds(0, 2); + inV.setTimestampSeconds(1, 2); + + batch.cols[0] = inV; + batch.cols[1] = outV; + + batch.size = 2; + return batch; + } + + public static VectorizedRowBatch getVectorizedRowBatchLongInTimestampOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + LongColumnVector inV; + TimestampColumnVector outV; + inV = new LongColumnVector(); + outV = new TimestampColumnVector(); + inV.vector[0] = -2; + inV.vector[1] = 2; + + batch.cols[0] = inV; + batch.cols[1] = outV; + + batch.size = 2; + return batch; + } + public static VectorizedRowBatch getBatchForStringMath() { VectorizedRowBatch batch = new VectorizedRowBatch(3); LongColumnVector inL; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java index 4a4ce27..7c91a03 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTimestampExpressions.java @@ -32,10 +32,12 @@ import junit.framework.Assert; import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; import org.apache.hadoop.hive.ql.udf.UDFHour; @@ -56,51 +58,42 @@ public class TestVectorTimestampExpressions { private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - /* copied over from VectorUDFTimestampFieldLong */ - private TimestampWritable toTimestampWritable(long nanos) { - long ms = (nanos / (1000 * 1000 * 1000)) * 1000; - /* the milliseconds should be kept in nanos */ - long ns = nanos % (1000*1000*1000); - if (ns < 0) { - /* - * The nano seconds are always positive, - * but the milliseconds can be negative - */ - ms -= 1000; - ns += 1000*1000*1000; - } - Timestamp ts = new Timestamp(ms); - ts.setNanos((int) ns); - return new TimestampWritable(ts); - } - - private long[] getAllBoundaries() { - List boundaries = new ArrayList(1); + private Timestamp[] getAllBoundaries(int minYear, int maxYear) { + ArrayList boundaries = new ArrayList(1); Calendar c = Calendar.getInstance(); c.setTimeInMillis(0); // c.set doesn't reset millis - for (int year = 1902; year <= 2038; year++) { + for (int year = minYear; year <= maxYear; year++) { c.set(year, Calendar.JANUARY, 1, 0, 0, 0); - long exactly = c.getTimeInMillis() * 1000 * 1000; + if (c.get(Calendar.YEAR) < 0 || c.get(Calendar.YEAR) >= 10000) { + continue; + } + long exactly = c.getTimeInMillis(); /* one second before and after */ - long before = exactly - 1000 * 1000 * 1000; - long after = exactly + 1000 * 1000 * 1000; - boundaries.add(Long.valueOf(before)); - boundaries.add(Long.valueOf(exactly)); - boundaries.add(Long.valueOf(after)); + long before = exactly - 1000; + long after = exactly + 1000; + if (minYear != 0) { + boundaries.add(new Timestamp(before)); + } + boundaries.add(new Timestamp(exactly)); + if (year != maxYear) { + boundaries.add(new Timestamp(after)); + } } - Long[] indices = boundaries.toArray(new Long[1]); - return ArrayUtils.toPrimitive(indices); + return boundaries.toArray(new Timestamp[0]); } - private VectorizedRowBatch getVectorizedRandomRowBatchLong2(int seed, int size) { + private Timestamp[] getAllBoundaries() { + return getAllBoundaries(0000, 9999); + } + + private VectorizedRowBatch getVectorizedRandomRowBatchTimestampLong(int seed, int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); - LongColumnVector lcv = new LongColumnVector(size); + TimestampColumnVector tcv = new TimestampColumnVector(size); Random rand = new Random(seed); for (int i = 0; i < size; i++) { - /* all 32 bit numbers qualify & multiply up to get nano-seconds */ - lcv.vector[i] = (long)(1000*1000*1000*rand.nextInt()); + tcv.set(i, PisaTimestamp.getRandTimestamp(rand)); } - batch.cols[0] = lcv; + batch.cols[0] = tcv; batch.cols[1] = new LongColumnVector(size); batch.size = size; return batch; @@ -112,7 +105,7 @@ private VectorizedRowBatch getVectorizedRandomRowBatchStringLong(int seed, int s Random rand = new Random(seed); for (int i = 0; i < size; i++) { /* all 32 bit numbers qualify & multiply up to get nano-seconds */ - byte[] encoded = encodeTime(1000 * 1000 * 1000 * rand.nextInt()); + byte[] encoded = encodeTime(PisaTimestamp.getRandTimestamp(rand)); bcv.vector[i] = encoded; bcv.start[i] = 0; bcv.length[i] = encoded.length; @@ -125,8 +118,8 @@ private VectorizedRowBatch getVectorizedRandomRowBatchStringLong(int seed, int s private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, TestType testType) { switch (testType) { - case LONG2: - return getVectorizedRandomRowBatchLong2(seed, size); + case TIMESTAMP_LONG: + return getVectorizedRandomRowBatchTimestampLong(seed, size); case STRING_LONG: return getVectorizedRandomRowBatchStringLong(seed, size); default: @@ -137,13 +130,13 @@ private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, TestT /* * Input array is used to fill the entire size of the vector row batch */ - private VectorizedRowBatch getVectorizedRowBatchLong2(long[] inputs, int size) { + private VectorizedRowBatch getVectorizedRowBatchTimestampLong(Timestamp[] inputs, int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); - LongColumnVector lcv = new LongColumnVector(size); + TimestampColumnVector tcv = new TimestampColumnVector(size); for (int i = 0; i < size; i++) { - lcv.vector[i] = inputs[i % inputs.length]; + tcv.set(i, inputs[i % inputs.length]); } - batch.cols[0] = lcv; + batch.cols[0] = tcv; batch.cols[1] = new LongColumnVector(size); batch.size = size; return batch; @@ -152,7 +145,7 @@ private VectorizedRowBatch getVectorizedRowBatchLong2(long[] inputs, int size) { /* * Input array is used to fill the entire size of the vector row batch */ - private VectorizedRowBatch getVectorizedRowBatchStringLong(long[] inputs, int size) { + private VectorizedRowBatch getVectorizedRowBatchStringLong(Timestamp[] inputs, int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); BytesColumnVector bcv = new BytesColumnVector(size); for (int i = 0; i < size; i++) { @@ -181,10 +174,10 @@ private VectorizedRowBatch getVectorizedRowBatchStringLong(byte[] vector, int st return batch; } - private VectorizedRowBatch getVectorizedRowBatch(long[] inputs, int size, TestType testType) { + private VectorizedRowBatch getVectorizedRowBatch(Timestamp[] inputs, int size, TestType testType) { switch (testType) { - case LONG2: - return getVectorizedRowBatchLong2(inputs, size); + case TIMESTAMP_LONG: + return getVectorizedRowBatchTimestampLong(inputs, size); case STRING_LONG: return getVectorizedRowBatchStringLong(inputs, size); default: @@ -192,10 +185,11 @@ private VectorizedRowBatch getVectorizedRowBatch(long[] inputs, int size, TestTy } } - private byte[] encodeTime(long time) { + private byte[] encodeTime(Timestamp timestamp) { ByteBuffer encoded; + long time = timestamp.getTime(); try { - String formatted = dateFormat.format(new Date(time / (1000 * 1000))); + String formatted = dateFormat.format(new Date(time)); encoded = Text.encode(formatted); } catch (CharacterCodingException e) { throw new RuntimeException(e); @@ -203,17 +197,17 @@ private VectorizedRowBatch getVectorizedRowBatch(long[] inputs, int size, TestTy return Arrays.copyOf(encoded.array(), encoded.limit()); } - private long decodeTime(byte[] time) { + private Timestamp decodeTime(byte[] time) { try { - return dateFormat.parse(Text.decode(time)).getTime() * 1000 * 1000; + return new Timestamp(dateFormat.parse(Text.decode(time)).getTime()); } catch (Exception e) { throw new RuntimeException(e); } } - private long readVectorElementAt(ColumnVector col, int i) { - if (col instanceof LongColumnVector) { - return ((LongColumnVector) col).vector[i]; + private Timestamp readVectorElementAt(ColumnVector col, int i) { + if (col instanceof TimestampColumnVector) { + return ((TimestampColumnVector) col).asScratchTimestamp(i); } if (col instanceof BytesColumnVector) { byte[] timeBytes = ((BytesColumnVector) col).vector[i]; @@ -223,20 +217,24 @@ private long readVectorElementAt(ColumnVector col, int i) { } private enum TestType { - LONG2, STRING_LONG + TIMESTAMP_LONG, STRING_LONG } - private void compareToUDFYearLong(long t, int y) { + private void compareToUDFYearLong(Timestamp t, int y) { UDFYear udf = new UDFYear(); - TimestampWritable tsw = toTimestampWritable(t); + TimestampWritable tsw = new TimestampWritable(t); IntWritable res = udf.evaluate(tsw); + if (res.get() != y) { + System.out.printf("%d vs %d for %s, %d\n", res.get(), y, t.toString(), + tsw.getTimestamp().getTime()/1000); + } Assert.assertEquals(res.get(), y); } private void verifyUDFYear(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; - if (testType == TestType.LONG2) { - udf = new VectorUDFYearLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFYearTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFYearString(0, 1); @@ -251,7 +249,7 @@ private void verifyUDFYear(VectorizedRowBatch batch, TestType testType) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFYearLong(t, (int) y); } else { @@ -261,7 +259,7 @@ private void verifyUDFYear(VectorizedRowBatch batch, TestType testType) { } private void testVectorUDFYear(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -269,7 +267,7 @@ private void testVectorUDFYear(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFYear(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -277,14 +275,14 @@ private void testVectorUDFYear(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFYear(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFYear(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFYear(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -302,8 +300,8 @@ private void testVectorUDFYear(TestType testType) { } @Test - public void testVectorUDFYearLong() { - testVectorUDFYear(TestType.LONG2); + public void testVectorUDFYearTimestamp() { + testVectorUDFYear(TestType.TIMESTAMP_LONG); } @Test @@ -318,17 +316,17 @@ public void testVectorUDFYearString() { Assert.assertEquals(true, lcv.isNull[0]); } - private void compareToUDFDayOfMonthLong(long t, int y) { + private void compareToUDFDayOfMonthLong(Timestamp t, int y) { UDFDayOfMonth udf = new UDFDayOfMonth(); - TimestampWritable tsw = toTimestampWritable(t); + TimestampWritable tsw = new TimestampWritable(t); IntWritable res = udf.evaluate(tsw); Assert.assertEquals(res.get(), y); } private void verifyUDFDayOfMonth(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; - if (testType == TestType.LONG2) { - udf = new VectorUDFDayOfMonthLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFDayOfMonthTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFDayOfMonthString(0, 1); @@ -343,7 +341,7 @@ private void verifyUDFDayOfMonth(VectorizedRowBatch batch, TestType testType) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFDayOfMonthLong(t, (int) y); } else { @@ -353,7 +351,7 @@ private void verifyUDFDayOfMonth(VectorizedRowBatch batch, TestType testType) { } private void testVectorUDFDayOfMonth(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -361,7 +359,7 @@ private void testVectorUDFDayOfMonth(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFDayOfMonth(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFDayOfMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -369,14 +367,14 @@ private void testVectorUDFDayOfMonth(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFDayOfMonth(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFDayOfMonth(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFDayOfMonth(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -394,8 +392,8 @@ private void testVectorUDFDayOfMonth(TestType testType) { } @Test - public void testVectorUDFDayOfMonthLong() { - testVectorUDFDayOfMonth(TestType.LONG2); + public void testVectorUDFDayOfMonthTimestamp() { + testVectorUDFDayOfMonth(TestType.TIMESTAMP_LONG); } @Test @@ -403,17 +401,17 @@ public void testVectorUDFDayOfMonthString() { testVectorUDFDayOfMonth(TestType.STRING_LONG); } - private void compareToUDFHourLong(long t, int y) { + private void compareToUDFHourLong(Timestamp t, int y) { UDFHour udf = new UDFHour(); - TimestampWritable tsw = toTimestampWritable(t); + TimestampWritable tsw = new TimestampWritable(t); IntWritable res = udf.evaluate(tsw); Assert.assertEquals(res.get(), y); } private void verifyUDFHour(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; - if (testType == TestType.LONG2) { - udf = new VectorUDFHourLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFHourTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFHourString(0, 1); @@ -428,7 +426,7 @@ private void verifyUDFHour(VectorizedRowBatch batch, TestType testType) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFHourLong(t, (int) y); } else { @@ -438,7 +436,7 @@ private void verifyUDFHour(VectorizedRowBatch batch, TestType testType) { } private void testVectorUDFHour(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -446,7 +444,7 @@ private void testVectorUDFHour(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFHour(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFHour(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -454,14 +452,14 @@ private void testVectorUDFHour(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFHour(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFHour(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFHour(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -479,8 +477,8 @@ private void testVectorUDFHour(TestType testType) { } @Test - public void testVectorUDFHourLong() { - testVectorUDFHour(TestType.LONG2); + public void testVectorUDFHourTimestamp() { + testVectorUDFHour(TestType.TIMESTAMP_LONG); } @Test @@ -488,17 +486,17 @@ public void testVectorUDFHourString() { testVectorUDFHour(TestType.STRING_LONG); } - private void compareToUDFMinuteLong(long t, int y) { + private void compareToUDFMinuteLong(Timestamp t, int y) { UDFMinute udf = new UDFMinute(); - TimestampWritable tsw = toTimestampWritable(t); + TimestampWritable tsw = new TimestampWritable(t); IntWritable res = udf.evaluate(tsw); Assert.assertEquals(res.get(), y); } private void verifyUDFMinute(VectorizedRowBatch batch, TestType testType) { VectorExpression udf = null; - if (testType == TestType.LONG2) { - udf = new VectorUDFMinuteLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFMinuteTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFMinuteString(0, 1); @@ -513,7 +511,7 @@ private void verifyUDFMinute(VectorizedRowBatch batch, TestType testType) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFMinuteLong(t, (int) y); } else { @@ -523,7 +521,7 @@ private void verifyUDFMinute(VectorizedRowBatch batch, TestType testType) { } private void testVectorUDFMinute(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -531,7 +529,7 @@ private void testVectorUDFMinute(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFMinute(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFMinute(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -539,14 +537,14 @@ private void testVectorUDFMinute(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFMinute(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFMinute(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFMinute(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -565,7 +563,7 @@ private void testVectorUDFMinute(TestType testType) { @Test public void testVectorUDFMinuteLong() { - testVectorUDFMinute(TestType.LONG2); + testVectorUDFMinute(TestType.TIMESTAMP_LONG); } @Test @@ -573,17 +571,17 @@ public void testVectorUDFMinuteString() { testVectorUDFMinute(TestType.STRING_LONG); } - private void compareToUDFMonthLong(long t, int y) { + private void compareToUDFMonthLong(Timestamp t, int y) { UDFMonth udf = new UDFMonth(); - TimestampWritable tsw = toTimestampWritable(t); + TimestampWritable tsw = new TimestampWritable(t); IntWritable res = udf.evaluate(tsw); Assert.assertEquals(res.get(), y); } private void verifyUDFMonth(VectorizedRowBatch batch, TestType testType) { VectorExpression udf; - if (testType == TestType.LONG2) { - udf = new VectorUDFMonthLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFMonthTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFMonthString(0, 1); @@ -598,7 +596,7 @@ private void verifyUDFMonth(VectorizedRowBatch batch, TestType testType) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFMonthLong(t, (int) y); } else { @@ -608,7 +606,7 @@ private void verifyUDFMonth(VectorizedRowBatch batch, TestType testType) { } private void testVectorUDFMonth(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -616,7 +614,7 @@ private void testVectorUDFMonth(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFMonth(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFMonth(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -624,14 +622,14 @@ private void testVectorUDFMonth(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFMonth(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFMonth(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFMonth(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -649,8 +647,8 @@ private void testVectorUDFMonth(TestType testType) { } @Test - public void testVectorUDFMonthLong() { - testVectorUDFMonth(TestType.LONG2); + public void testVectorUDFMonthTimestamp() { + testVectorUDFMonth(TestType.TIMESTAMP_LONG); } @Test @@ -658,17 +656,17 @@ public void testVectorUDFMonthString() { testVectorUDFMonth(TestType.STRING_LONG); } - private void compareToUDFSecondLong(long t, int y) { + private void compareToUDFSecondLong(Timestamp t, int y) { UDFSecond udf = new UDFSecond(); - TimestampWritable tsw = toTimestampWritable(t); + TimestampWritable tsw = new TimestampWritable(t); IntWritable res = udf.evaluate(tsw); Assert.assertEquals(res.get(), y); } private void verifyUDFSecond(VectorizedRowBatch batch, TestType testType) { VectorExpression udf; - if (testType == TestType.LONG2) { - udf = new VectorUDFSecondLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFSecondTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFSecondString(0, 1); @@ -683,7 +681,7 @@ private void verifyUDFSecond(VectorizedRowBatch batch, TestType testType) { if (!batch.cols[in].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFSecondLong(t, (int) y); } else { @@ -693,7 +691,7 @@ private void verifyUDFSecond(VectorizedRowBatch batch, TestType testType) { } private void testVectorUDFSecond(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -701,7 +699,7 @@ private void testVectorUDFSecond(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFSecond(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFSecond(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -709,14 +707,14 @@ private void testVectorUDFSecond(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFSecond(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFSecond(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFSecond(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -735,7 +733,7 @@ private void testVectorUDFSecond(TestType testType) { @Test public void testVectorUDFSecondLong() { - testVectorUDFSecond(TestType.LONG2); + testVectorUDFSecond(TestType.TIMESTAMP_LONG); } @Test @@ -753,11 +751,11 @@ private LongWritable getLongWritable(TimestampWritable i) { } } - private void compareToUDFUnixTimeStampLong(long t, long y) { - TimestampWritable tsw = toTimestampWritable(t); + private void compareToUDFUnixTimeStampLong(Timestamp t, long y) { + TimestampWritable tsw = new TimestampWritable(t); LongWritable res = getLongWritable(tsw); if(res.get() != y) { - System.out.printf("%d vs %d for %d, %d\n", res.get(), y, t, + System.out.printf("%d vs %d for %s, %d\n", res.get(), y, t.toString(), tsw.getTimestamp().getTime()/1000); } @@ -766,8 +764,8 @@ private void compareToUDFUnixTimeStampLong(long t, long y) { private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch, TestType testType) { VectorExpression udf; - if (testType == TestType.LONG2) { - udf = new VectorUDFUnixTimeStampLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFUnixTimeStampTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFUnixTimeStampString(0, 1); @@ -782,7 +780,7 @@ private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch, TestType testType) if (!batch.cols[out].noNulls) { Assert.assertEquals(batch.cols[out].isNull[i], batch.cols[in].isNull[i]); } - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFUnixTimeStampLong(t, y); } else { @@ -792,7 +790,7 @@ private void verifyUDFUnixTimeStamp(VectorizedRowBatch batch, TestType testType) } private void testVectorUDFUnixTimeStamp(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -800,7 +798,7 @@ private void testVectorUDFUnixTimeStamp(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFUnixTimeStamp(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFUnixTimeStamp(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -808,14 +806,14 @@ private void testVectorUDFUnixTimeStamp(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFUnixTimeStamp(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFUnixTimeStamp(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFUnixTimeStamp(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -833,8 +831,8 @@ private void testVectorUDFUnixTimeStamp(TestType testType) { } @Test - public void testVectorUDFUnixTimeStampLong() { - testVectorUDFUnixTimeStamp(TestType.LONG2); + public void testVectorUDFUnixTimeStampTimestamp() { + testVectorUDFUnixTimeStamp(TestType.TIMESTAMP_LONG); } @Test @@ -842,17 +840,17 @@ public void testVectorUDFUnixTimeStampString() { testVectorUDFUnixTimeStamp(TestType.STRING_LONG); } - private void compareToUDFWeekOfYearLong(long t, int y) { + private void compareToUDFWeekOfYearLong(Timestamp t, int y) { UDFWeekOfYear udf = new UDFWeekOfYear(); - TimestampWritable tsw = toTimestampWritable(t); + TimestampWritable tsw = new TimestampWritable(t); IntWritable res = udf.evaluate(tsw); Assert.assertEquals(res.get(), y); } private void verifyUDFWeekOfYear(VectorizedRowBatch batch, TestType testType) { VectorExpression udf; - if (testType == TestType.LONG2) { - udf = new VectorUDFWeekOfYearLong(0, 1); + if (testType == TestType.TIMESTAMP_LONG) { + udf = new VectorUDFWeekOfYearTimestamp(0, 1); udf.setInputTypes(VectorExpression.Type.TIMESTAMP); } else { udf = new VectorUDFWeekOfYearString(0, 1); @@ -864,7 +862,7 @@ private void verifyUDFWeekOfYear(VectorizedRowBatch batch, TestType testType) { for (int i = 0; i < batch.size; i++) { if (batch.cols[in].noNulls || !batch.cols[in].isNull[i]) { - long t = readVectorElementAt(batch.cols[in], i); + Timestamp t = readVectorElementAt(batch.cols[in], i); long y = ((LongColumnVector) batch.cols[out]).vector[i]; compareToUDFWeekOfYearLong(t, (int) y); } else { @@ -874,7 +872,7 @@ private void verifyUDFWeekOfYear(VectorizedRowBatch batch, TestType testType) { } private void testVectorUDFWeekOfYear(TestType testType) { - VectorizedRowBatch batch = getVectorizedRowBatch(new long[] {0}, + VectorizedRowBatch batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, VectorizedRowBatch.DEFAULT_SIZE, testType); Assert.assertTrue(((LongColumnVector) batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector) batch.cols[1]).isRepeating); @@ -882,7 +880,7 @@ private void testVectorUDFWeekOfYear(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFWeekOfYear(batch, testType); - long[] boundaries = getAllBoundaries(); + Timestamp[] boundaries = getAllBoundaries(); batch = getVectorizedRowBatch(boundaries, boundaries.length, testType); verifyUDFWeekOfYear(batch, testType); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); @@ -890,14 +888,14 @@ private void testVectorUDFWeekOfYear(TestType testType) { TestVectorizedRowBatch.addRandomNulls(batch.cols[1]); verifyUDFWeekOfYear(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; verifyUDFWeekOfYear(batch, testType); batch.cols[0].noNulls = false; batch.cols[0].isNull[0] = true; verifyUDFWeekOfYear(batch, testType); - batch = getVectorizedRowBatch(new long[] {0}, 1, testType); + batch = getVectorizedRowBatch(new Timestamp[] {new Timestamp(0)}, 1, testType); batch.cols[0].isRepeating = true; batch.selectedInUse = true; batch.selected = new int[] {42}; @@ -915,8 +913,8 @@ private void testVectorUDFWeekOfYear(TestType testType) { } @Test - public void testVectorUDFWeekOfYearLong() { - testVectorUDFWeekOfYear(TestType.LONG2); + public void testVectorUDFWeekOfYearTimestamp() { + testVectorUDFWeekOfYear(TestType.TIMESTAMP_LONG); } @Test @@ -926,12 +924,13 @@ public void testVectorUDFWeekOfYearString() { public static void main(String[] args) { TestVectorTimestampExpressions self = new TestVectorTimestampExpressions(); - self.testVectorUDFYearLong(); - self.testVectorUDFMonthLong(); - self.testVectorUDFDayOfMonthLong(); - self.testVectorUDFHourLong(); - self.testVectorUDFWeekOfYearLong(); - self.testVectorUDFUnixTimeStampLong(); + self.testVectorUDFYearTimestamp(); + self.testVectorUDFMonthTimestamp(); + self.testVectorUDFDayOfMonthTimestamp(); + self.testVectorUDFHourTimestamp(); + self.testVectorUDFWeekOfYearTimestamp(); + self.testVectorUDFUnixTimeStampTimestamp(); + self.testVectorUDFYearString(); self.testVectorUDFMonthString(); self.testVectorUDFDayOfMonthString(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index 0e23680..76d0f91 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -29,13 +29,16 @@ import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.exec.vector.expressions.*; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.junit.Test; /** @@ -43,9 +46,6 @@ */ public class TestVectorTypeCasts { - // Number of nanoseconds in one second - private static final long NANOS_PER_SECOND = 1000000000; - @Test public void testVectorCastLongToDouble() { VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInDoubleOut(); @@ -79,13 +79,13 @@ public void testCastDoubleToBoolean() { @Test public void testCastDoubleToTimestamp() { - VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut(); - LongColumnVector resultV = (LongColumnVector) b.cols[1]; + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInTimestampOut(); + TimestampColumnVector resultV = (TimestampColumnVector) b.cols[1]; b.cols[0].noNulls = true; - VectorExpression expr = new CastDoubleToTimestampViaDoubleToLong(0, 1); + VectorExpression expr = new CastDoubleToTimestamp(0, 1); expr.evaluate(b); - Assert.assertEquals(0, resultV.vector[3]); - Assert.assertEquals((long) (0.5d * NANOS_PER_SECOND), resultV.vector[4]); + Assert.assertEquals(0.0, resultV.getTimestampSecondsWithFractionalNanos(3)); + Assert.assertEquals(0.5d, resultV.getTimestampSecondsWithFractionalNanos(4)); } @Test @@ -103,39 +103,42 @@ public void testCastLongToBoolean() { @Test public void testCastLongToTimestamp() { - VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut(); - LongColumnVector resultV = (LongColumnVector) b.cols[1]; + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInTimestampOut(); + TimestampColumnVector resultV = (TimestampColumnVector) b.cols[1]; b.cols[0].noNulls = true; - VectorExpression expr = new CastLongToTimestampViaLongToLong(0, 1); + VectorExpression expr = new CastLongToTimestamp(0, 1); expr.evaluate(b); - Assert.assertEquals(-2 * NANOS_PER_SECOND, resultV.vector[0]); - Assert.assertEquals(2 * NANOS_PER_SECOND, resultV.vector[1]); + Assert.assertEquals(-2, resultV.getTimestampSeconds(0)); + Assert.assertEquals(2, resultV.getTimestampSeconds(1)); } @Test public void testCastTimestampToLong() { - VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut(); - LongColumnVector inV = (LongColumnVector) b.cols[0]; - inV.vector[0] = NANOS_PER_SECOND; // Make one entry produce interesting result + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchTimestampInLongOut(); + TimestampColumnVector inV = (TimestampColumnVector) b.cols[0]; + inV.set(0, new PisaTimestamp(0, PisaTimestamp.NANOSECONDS_PER_SECOND)); // Make one entry produce interesting result // (1 sec after epoch). LongColumnVector resultV = (LongColumnVector) b.cols[1]; b.cols[0].noNulls = true; - VectorExpression expr = new CastTimestampToLongViaLongToLong(0, 1); + VectorExpression expr = new CastTimestampToLong(0, 1); expr.evaluate(b); Assert.assertEquals(1, resultV.vector[0]); } @Test public void testCastTimestampToDouble() { - VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInDoubleOut(); - LongColumnVector inV = (LongColumnVector) b.cols[0]; + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchTimestampInDoubleOut(); + TimestampColumnVector inV = (TimestampColumnVector) b.cols[0]; DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; b.cols[0].noNulls = true; - VectorExpression expr = new CastTimestampToDoubleViaLongToDouble(0, 1); + VectorExpression expr = new CastTimestampToDouble(0, 1); expr.evaluate(b); - Assert.assertEquals(-1E-9D , resultV.vector[1]); - Assert.assertEquals(1E-9D, resultV.vector[3]); + TimestampWritable tw = new TimestampWritable(); + tw.set(inV.asScratchTimestamp(1)); + Assert.assertEquals(tw.getDouble() , resultV.vector[1]); + tw.set(inV.asScratchTimestamp(3)); + Assert.assertEquals(tw.getDouble(), resultV.vector[3]); } public byte[] toBytes(String s) { @@ -356,13 +359,16 @@ private VectorizedRowBatch getBatchDecimalString() { @Test public void testCastDecimalToTimestamp() { - VectorizedRowBatch b = getBatchDecimalLong2(); + VectorizedRowBatch b = getBatchDecimalTimestamp(); VectorExpression expr = new CastDecimalToTimestamp(0, 1); expr.evaluate(b); - LongColumnVector r = (LongColumnVector) b.cols[1]; - assertEquals(1111111111L, r.vector[0]); - assertEquals(-2222222222L, r.vector[1]); - assertEquals(31536000999999999L, r.vector[2]); + TimestampColumnVector r = (TimestampColumnVector) b.cols[1]; + assertEquals(1111111111L, r.getNanoOfDay(0)); + assertEquals(0L, r.getEpochDay(0)); + assertEquals(-2222222222L, r.getNanoOfDay(1)); + assertEquals(0L, r.getEpochDay(1)); + assertEquals(999999999L, r.getNanoOfDay(2)); + assertEquals(365L, r.getEpochDay(2)); } private VectorizedRowBatch getBatchDecimalLong2() { @@ -381,6 +387,22 @@ private VectorizedRowBatch getBatchDecimalLong2() { return b; } + private VectorizedRowBatch getBatchDecimalTimestamp() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + DecimalColumnVector dv; + short scale = 9; + b.cols[0] = dv = new DecimalColumnVector(18, scale); + b.cols[1] = new TimestampColumnVector(); + + b.size = 3; + + dv.vector[0].set(HiveDecimal.create("1.111111111").setScale(scale)); + dv.vector[1].set(HiveDecimal.create("-2.222222222").setScale(scale)); + dv.vector[2].set(HiveDecimal.create("31536000.999999999").setScale(scale)); + + return b; + } + @Test public void testCastLongToDecimal() { VectorizedRowBatch b = getBatchLongDecimal(); @@ -403,6 +425,17 @@ private VectorizedRowBatch getBatchLongDecimal() { return b; } + private VectorizedRowBatch getBatchTimestampDecimal() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + TimestampColumnVector tcv; + b.cols[0] = tcv = new TimestampColumnVector(); + b.cols[1] = new DecimalColumnVector(18, 2); + tcv.set(0, new PisaTimestamp( 0, 0)); + tcv.set(1, new PisaTimestamp( 0, -1)); + tcv.set(2, new PisaTimestamp( 99999999999999L / PisaTimestamp.NANOSECONDS_PER_DAY, 99999999999999L % PisaTimestamp.NANOSECONDS_PER_DAY)); + return b; + } + @Test public void testCastDoubleToDecimal() { VectorizedRowBatch b = getBatchDoubleDecimal(); @@ -466,10 +499,10 @@ public void testCastTimestampToDecimal() { // The input timestamps are stored as long values // measured in nanoseconds from the epoch. - VectorizedRowBatch b = getBatchLongDecimal(); + VectorizedRowBatch b = getBatchTimestampDecimal(); VectorExpression expr = new CastTimestampToDecimal(0, 1); - LongColumnVector inL = (LongColumnVector) b.cols[0]; - inL.vector[1] = -1990000000L; + TimestampColumnVector inT = (TimestampColumnVector) b.cols[0]; + inT.set(1, new PisaTimestamp(0, -1990000000L)); expr.evaluate(b); DecimalColumnVector r = (DecimalColumnVector) b.cols[1]; assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("0.00"))); @@ -478,7 +511,7 @@ public void testCastTimestampToDecimal() { // Try again with a value that won't fit in 5 digits, to make // sure that NULL is produced. - b = getBatchLongDecimalPrec5Scale2(); + b = getBatchTimestampDecimalPrec5Scale2(); expr.evaluate(b); r = (DecimalColumnVector) b.cols[1]; assertFalse(r.noNulls); @@ -503,6 +536,17 @@ private VectorizedRowBatch getBatchLongDecimalPrec5Scale2() { return b; } + private VectorizedRowBatch getBatchTimestampDecimalPrec5Scale2() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + TimestampColumnVector tcv; + b.cols[0] = tcv = new TimestampColumnVector(); + b.cols[1] = new DecimalColumnVector(5, 2); + tcv.set(0, new PisaTimestamp(0, 0)); + tcv.set(1, new PisaTimestamp(0, -1)); + tcv.set(2, new PisaTimestamp(99999999999999L / PisaTimestamp.NANOSECONDS_PER_DAY, 99999999999999L % PisaTimestamp.NANOSECONDS_PER_DAY)); + return b; + } + /* @Test public void testCastDecimalToDecimal() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java index bbda9a3..ab86082 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeVectorRowBatchFromObjectIterables.java @@ -27,12 +27,13 @@ import java.util.regex.Pattern; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -101,16 +102,16 @@ public void assign( } }; } else if (types[i].equalsIgnoreCase("timestamp")) { - batch.cols[i] = new LongColumnVector(batchSize); + batch.cols[i] = new TimestampColumnVector(batchSize); columnAssign[i] = new ColumnVectorAssign() { @Override public void assign( ColumnVector columnVector, int row, Object value) { - LongColumnVector lcv = (LongColumnVector) columnVector; + TimestampColumnVector lcv = (TimestampColumnVector) columnVector; Timestamp t = (Timestamp) value; - lcv.vector[row] = TimestampUtils.getTimeNanoSec(t); + lcv.set(row, new PisaTimestamp(t)); } }; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java index 5d79f9c..21bae41 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/VectorizedRowGroupGenUtil.java @@ -18,13 +18,15 @@ package org.apache.hadoop.hive.ql.exec.vector.util; +import java.sql.Timestamp; import java.util.Random; -import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -77,6 +79,30 @@ public static LongColumnVector generateLongColumnVector( return lcv; } + public static TimestampColumnVector generateTimestampColumnVector( + boolean nulls, boolean repeating, int size, Random rand) { + TimestampColumnVector tcv = new TimestampColumnVector(size); + + tcv.noNulls = !nulls; + tcv.isRepeating = repeating; + + Timestamp repeatingTimestamp = PisaTimestamp.getRandTimestamp(rand); + + int nullFrequency = generateNullFrequency(rand); + + for(int i = 0; i < size; i++) { + if(nulls && (repeating || i % nullFrequency == 0)) { + tcv.isNull[i] = true; + tcv.setNullValue(i); + + }else { + tcv.isNull[i] = false; + tcv.set(i, repeating ? repeatingTimestamp : PisaTimestamp.getRandTimestamp(rand)); + } + } + return tcv; + } + public static DoubleColumnVector generateDoubleColumnVector(boolean nulls, boolean repeating, int size, Random rand) { DoubleColumnVector dcv = new DoubleColumnVector(size); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index f81f5bb8..cd1982f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -64,6 +64,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.AcidInputFormat; @@ -109,7 +110,6 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.Progressable; import org.apache.orc.OrcProto; - import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -1760,7 +1760,7 @@ public void testVectorizationWithAcid() throws Exception { BytesColumnVector stringColumn = (BytesColumnVector) value.cols[7]; DecimalColumnVector decimalColumn = (DecimalColumnVector) value.cols[8]; LongColumnVector dateColumn = (LongColumnVector) value.cols[9]; - LongColumnVector timestampColumn = (LongColumnVector) value.cols[10]; + TimestampColumnVector timestampColumn = (TimestampColumnVector) value.cols[10]; for(int i=0; i < 100; i++) { assertEquals("checking boolean " + i, i % 2 == 0 ? 1 : 0, booleanColumn.vector[i]); @@ -1781,8 +1781,8 @@ public void testVectorizationWithAcid() throws Exception { assertEquals("checking date " + i, i, dateColumn.vector[i]); long millis = (long) i * MILLIS_IN_DAY; millis -= LOCAL_TIMEZONE.getOffset(millis); - assertEquals("checking timestamp " + i, millis * 1000000L, - timestampColumn.vector[i]); + assertEquals("checking timestamp " + i, millis, + timestampColumn.getTimestampMilliseconds(i)); } assertEquals(false, reader.next(key, value)); } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java index f5b06db..4ca20c5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorOrcFile.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hive.ql.io.orc; import com.google.common.collect.Lists; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; @@ -30,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -518,13 +521,13 @@ public void testTimestamp() throws Exception { tslist.add(Timestamp.valueOf("2008-10-02 00:00:00")); VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024); - LongColumnVector vec = new LongColumnVector(1024); + TimestampColumnVector vec = new TimestampColumnVector(1024); batch.cols[0] = vec; batch.reset(); batch.size = tslist.size(); for (int i=0; i < tslist.size(); ++i) { Timestamp ts = tslist.get(i); - vec.vector[i] = TimestampUtils.getTimeNanoSec(ts); + vec.set(i, new PisaTimestamp(ts)); } writer.addRowBatch(batch); writer.close(); @@ -1340,9 +1343,10 @@ public void createOrcDateFile(Path file, int minYear, int maxYear batch.size = 1000; for (int year = minYear; year < maxYear; ++year) { for (int ms = 1000; ms < 2000; ++ms) { - ((LongColumnVector) batch.cols[0]).vector[ms - 1000] = - TimestampUtils.getTimeNanoSec(Timestamp.valueOf(year + - "-05-05 12:34:56." + ms)); + TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0]; + timestampColVector.set(ms - 1000, + new PisaTimestamp(Timestamp.valueOf(year + + "-05-05 12:34:56." + ms))); ((LongColumnVector) batch.cols[1]).vector[ms - 1000] = new DateWritable(new Date(year - 1900, 11, 25)).getDays(); } @@ -1380,8 +1384,8 @@ private static void setUnion(VectorizedRowBatch batch, int rowId, HiveDecimalWritable dec) { UnionColumnVector union = (UnionColumnVector) batch.cols[1]; if (ts != null) { - ((LongColumnVector) batch.cols[0]).vector[rowId] = - TimestampUtils.getTimeNanoSec(ts); + TimestampColumnVector timestampColVector = (TimestampColumnVector) batch.cols[0]; + timestampColVector.set(rowId, new PisaTimestamp(ts)); } else { batch.cols[0].isNull[rowId] = true; batch.cols[0].noNulls = false; @@ -2173,9 +2177,9 @@ public void testRepeating() throws Exception { ((DoubleColumnVector) batch.cols[5]).vector[0] = 0.0009765625; ((LongColumnVector) batch.cols[6]).vector[0] = new DateWritable(new Date(111, 6, 1)).getDays(); - ((LongColumnVector) batch.cols[7]).vector[0] = - TimestampUtils.getTimeNanoSec(new Timestamp(115, 9, 23, 10, 11, 59, - 999999999)); + ((TimestampColumnVector) batch.cols[7]).set(0, + new PisaTimestamp(new Timestamp(115, 9, 23, 10, 11, 59, + 999999999))); ((DecimalColumnVector) batch.cols[8]).vector[0] = new HiveDecimalWritable("1.234567"); ((BytesColumnVector) batch.cols[9]).setVal(0, "Echelon".getBytes()); @@ -2228,9 +2232,11 @@ public void testRepeating() throws Exception { ((DoubleColumnVector) batch.cols[5]).vector[r] = 0.0009765625 * r; ((LongColumnVector) batch.cols[6]).vector[r] = new DateWritable(new Date(111, 6, 1)).getDays() + r; - ((LongColumnVector) batch.cols[7]).vector[r] = - TimestampUtils.getTimeNanoSec(new Timestamp(115, 9, 23, 10, 11, 59, - 999999999)) + r * 1000000000L; + + Timestamp ts = new Timestamp(115, 9, 23, 10, 11, 59, 999999999); + PisaTimestamp pisaTimestamp = new PisaTimestamp(ts); + pisaTimestamp.addSeconds(pisaTimestamp, r, pisaTimestamp); + ((TimestampColumnVector) batch.cols[7]).set(r, pisaTimestamp); ((DecimalColumnVector) batch.cols[8]).vector[r] = new HiveDecimalWritable("1.234567"); ((BytesColumnVector) batch.cols[9]).setVal(r, diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index a777b1c..adb52f0 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -176,13 +177,8 @@ private void checkVectorizedReader() throws Exception { } else if (a instanceof TimestampWritable) { // Timestamps are stored as long, so convert and compare TimestampWritable t = ((TimestampWritable) a); - // Timestamp.getTime() is overriden and is - // long time = super.getTime(); - // return (time + (nanos / 1000000)); - Long timeInNanoSec = (t.getTimestamp().getTime() * 1000000) - + (t.getTimestamp().getNanos() % 1000000); - long b = ((LongColumnVector) cv).vector[rowId]; - Assert.assertEquals(timeInNanoSec.toString(), Long.toString(b)); + TimestampColumnVector tcv = ((TimestampColumnVector) cv); + Assert.assertEquals(t.getTimestamp(), tcv.asScratchTimestamp(rowId)); } else if (a instanceof DateWritable) { // Dates are stored as long, so convert and compare diff --git ql/src/test/queries/clientpositive/vectorized_timestamp.q ql/src/test/queries/clientpositive/vectorized_timestamp.q new file mode 100644 index 0000000..baf0cfa --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -0,0 +1,27 @@ +set hive.fetch.task.conversion=none; + +DROP TABLE IF EXISTS test; +CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; +INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); + +SET hive.vectorized.execution.enabled = false; +EXPLAIN +SELECT ts FROM test; + +SELECT ts FROM test; + +EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; + +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; + +SET hive.vectorized.execution.enabled = true; +EXPLAIN +SELECT ts FROM test; + +SELECT ts FROM test; + +EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; + +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; \ No newline at end of file diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out new file mode 100644 index 0000000..bbc9b10 --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -0,0 +1,239 @@ +PREHOOK: query: DROP TABLE IF EXISTS test +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test(ts TIMESTAMP) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: CREATE TABLE test(ts TIMESTAMP) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test +POSTHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test +POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN +SELECT ts FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT ts FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ts FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT ts FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 +9999-12-31 23:59:59.999999999 +PREHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ts), max(ts) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), (_col1 - _col0) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 +PREHOOK: query: EXPLAIN +SELECT ts FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT ts FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ts FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT ts FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 +9999-12-31 23:59:59.999999999 +PREHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ts), max(ts) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), (_col1 - _col0) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java index 9ea6e91..7456725 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java @@ -349,19 +349,6 @@ public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOExceptio BinarySortableSerDe.serializeHiveIntervalDayTime(output, vidt, invert); } - @Override - public void writeHiveIntervalDayTime(long totalNanos) throws IOException { - final boolean invert = columnSortOrderIsDesc[++index]; - - // This field is not a null. - BinarySortableSerDe.writeByte(output, (byte) 1, invert); - - long totalSecs = DateUtils.getIntervalDayTimeTotalSecondsFromTotalNanos(totalNanos); - int nanos = DateUtils.getIntervalDayTimeNanosFromTotalNanos(totalNanos); - BinarySortableSerDe.serializeLong(output, totalSecs, invert); - BinarySortableSerDe.serializeInt(output, nanos, invert); - } - /* * DECIMAL. */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java index 21daa8b..e562ce3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/SerializeWrite.java @@ -145,9 +145,6 @@ */ void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException; - // We provide a faster way to write a hive interval day time without a HiveIntervalDayTime object. - void writeHiveIntervalDayTime(long totalNanos) throws IOException; - /* * DECIMAL. */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java index 533b76f..fdc64e7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java @@ -25,8 +25,10 @@ import java.sql.Timestamp; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.util.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; @@ -150,6 +152,21 @@ public void set(TimestampWritable t) { } } + public static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) { + ((Date) timestamp).setTime(secondsAsMillis); + timestamp.setNanos(nanos); + } + + public void setInternal(long secondsAsMillis, int nanos) { + + // This is our way of documenting that we are MUTATING the contents of + // this writable's internal timestamp. + updateTimestamp(timestamp, secondsAsMillis, nanos); + + bytesEmpty = true; + timestampEmpty = false; + } + private void clearTimestamp() { timestampEmpty = true; } @@ -656,7 +673,7 @@ static long readSevenByteLong(byte[] bytes, int offset) { * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of * seconds. 500 would round to 0, -500 would round to -1. */ - static long millisToSeconds(long millis) { + public static long millisToSeconds(long millis) { if (millis >= 0) { return millis / 1000; } else { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java index b64a803..280c2b0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java @@ -473,26 +473,6 @@ public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOExceptio index++; } - @Override - public void writeHiveIntervalDayTime(long totalNanos) throws IOException { - - if (index > 0) { - output.write(separator); - } - - if (hiveIntervalDayTime == null) { - hiveIntervalDayTime = new HiveIntervalDayTime(); - } - if (hiveIntervalDayTimeWritable == null) { - hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); - } - DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); - hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); - LazyHiveIntervalDayTime.writeUTF8(output, hiveIntervalDayTimeWritable); - - index++; - } - /* * DECIMAL. */ diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java index 8f81df6..91ef12d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java @@ -673,42 +673,6 @@ public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOExceptio } } - @Override - public void writeHiveIntervalDayTime(long totalNanos) throws IOException { - - // Every 8 fields we write a NULL byte. - if ((fieldIndex % 8) == 0) { - if (fieldIndex > 0) { - // Write back previous 8 field's NULL byte. - output.writeByte(nullOffset, nullByte); - nullByte = 0; - nullOffset = output.getLength(); - } - // Allocate next NULL byte. - output.reserve(1); - } - - // Set bit in NULL byte when a field is NOT NULL. - nullByte |= 1 << (fieldIndex % 8); - - if (hiveIntervalDayTime == null) { - hiveIntervalDayTime = new HiveIntervalDayTime(); - } - if (hiveIntervalDayTimeWritable == null) { - hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable(); - } - DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos); - hiveIntervalDayTimeWritable.set(hiveIntervalDayTime); - hiveIntervalDayTimeWritable.writeToByteStream(output); - - fieldIndex++; - - if (fieldIndex == fieldCount) { - // Write back the final NULL byte before the last fields. - output.writeByte(nullOffset, nullByte); - } - } - /* * DECIMAL. */ diff --git storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java new file mode 100644 index 0000000..00583c4 --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java @@ -0,0 +1,618 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Preconditions; + +/** + * Pisa project is named after the famous Leonardo of Pisa, or better known as Fibanacci. + * + * A Pisa timestamp is a timestamp without a time-zone (i.e. local) in the ISO-8601 calendar system, + * such as 2007-12-03 10:15:30.0123456789, with accuracy to the nanosecond (1 billionth of a + * second). + * + * Pisa timestamps use the same starting point as a java.sql.Timestamp -- the number of nanoseconds + * since the epoch (1970-01-01, or the day Unix roared awake) where negative numbers represent + * earlier days. + * + * However, we use the PisaTimestamp class which has different design requirements than + * java.sql.Timestamp. It is designed to be mutable and NOT thread-safe to avoid high memory + * allocation / garbage collection costs. And, provides for ease of use by our vectorization + * code to avoid the high CPU data cache miss cost for small objects, too. We do this by allowing + * the epoch day and nano of day to be stored externally (i.e. vector arrays). + * + * And, importantly, PisaTimestamp is a light-weight class similar to the epochDay/NanoOfDay of + * the newer Java 8 LocalDateTime class, except the timestamp is *indifferent* to timezone. + * + * A common usage would be to treat it as UTC. + * + * You can work with days, seconds, milliseconds, nanoseconds, etc. But to work with months you + * will need to convert to an external timestamp object and use calendars, etc. + * * + * The storage for a PisaTimestamp is: + * + * long epochDay + * // The number of days since 1970-01-01 (==> similar to Java 8 LocalDate). + * long nanoOfDay + * // The number of nanoseconds within the day, with the range of + * // 0 to 24 * 60 * 60 * 1,000,000,000 - 1 (==> similar to Java 8 LocalTime). + * + * Both epochDay and nanoOfDay are signed. + * + * We when both epochDay and nanoOfDay are non-zero, we will maintain them so they have the + * same sign. + * + */ + +public class PisaTimestamp { + + private static final long serialVersionUID = 1L; + + private long epochDay; + private long nanoOfDay; + + private Timestamp scratchTimestamp; + + public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); + public static final long NANOSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1); + public static final long NANOSECONDS_PER_DAY = TimeUnit.DAYS.toNanos(1); + + public static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); + public static final long MILLISECONDS_PER_DAY = TimeUnit.DAYS.toMillis(1); + + public static final long SECONDS_PER_DAY = TimeUnit.DAYS.toSeconds(1); + + public static final long MIN_NANO_OF_DAY = -NANOSECONDS_PER_DAY; + public static final long MAX_NANO_OF_DAY = NANOSECONDS_PER_DAY; + + public long getEpochDay() { + return epochDay; + } + + public long getNanoOfDay() { + return nanoOfDay; + } + + public PisaTimestamp() { + epochDay = 0; + nanoOfDay = 0; + scratchTimestamp = new Timestamp(0); + } + + public PisaTimestamp(long epochDay, long nanoOfDay) { + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + scratchTimestamp = new Timestamp(0); + } + + public PisaTimestamp(Timestamp timestamp) { + super(); + updateFromTimestamp(timestamp); + } + + public void reset() { + epochDay = 0; + nanoOfDay = 0; + } + + /** + * NOTE: This method validates the integrity rules between epoch day and nano of day, + * but not overflow/underflow of epoch day. Since epoch day overflow/underflow can result + * from to client data input, that must be checked manually with as this + * class do not throw data range exceptions as a rule. It leaves that choice to the caller. + * @param epochDay + * @param nanoOfDay + * @return true if epoch day and nano of day have integrity. + */ + public static boolean validateIntegrity(long epochDay, long nanoOfDay) { + + // Range check nano per day as invariant. + if (nanoOfDay >= NANOSECONDS_PER_DAY || nanoOfDay <= -NANOSECONDS_PER_DAY) { + return false; + } + + // Signs of epoch day and nano of day must match. + if (!(epochDay >= 0 && nanoOfDay >= 0 || + epochDay <= 0 && nanoOfDay <= 0)) { + return false; + } + + return true; + } + + /** + * Set this PisaTimestamp from another PisaTimestamp. + * @param source + * @return this + */ + public PisaTimestamp update(PisaTimestamp source) { + this.epochDay = source.epochDay; + this.nanoOfDay = source.nanoOfDay; + return this; + } + + /** + * Set this PisaTimestamp from a epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return this + */ + public PisaTimestamp update(long epochDay, long nanoOfDay) { + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay), + "epochDay " + epochDay + ", nanoOfDay " + nanoOfDay + " not valid"); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + return this; + } + + /** + * Set the PisaTimestamp from a Timestamp object. + * @param timestamp + * @return this + */ + public PisaTimestamp updateFromTimestamp(Timestamp timestamp) { + + long timestampTime = timestamp.getTime(); + int nanos = timestamp.getNanos(); + + // The Timestamp class does not use the milliseconds part (always 0). It is covered by nanos. + long epochSeconds = timestampTime / MILLISECONDS_PER_SECOND; + if (epochSeconds < 0 && nanos > 0) { + epochSeconds--; + } + + nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND + nanos; + epochDay = epochSeconds / SECONDS_PER_DAY + (nanoOfDay / NANOSECONDS_PER_DAY); + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + return this; + } + + + + /** + * Set this PisaTimestamp from a timestamp milliseconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromTimestampMilliseconds(long timestampMilliseconds) { + scratchTimestamp.setTime(timestampMilliseconds); + updateFromTimestamp(scratchTimestamp); + return this; + } + + /** + * Set this PisaTimestamp from a timestamp seconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromTimestampSeconds(long timestampSeconds) { + scratchTimestamp.setTime(timestampSeconds * MILLISECONDS_PER_SECOND); + updateFromTimestamp(scratchTimestamp); + return this; + } + + /** + * Set this PisaTimestamp from a timestamp seconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromTimestampSecondsWithFractionalNanoseconds( + double timestampSecondsWithFractionalNanoseconds) { + + // Otherwise, BigDecimal throws an exception. (Support vector operations that sometimes + // do work on double Not-a-Number NaN values). + if (Double.isNaN(timestampSecondsWithFractionalNanoseconds)) { + timestampSecondsWithFractionalNanoseconds = 0; + } + // Algorithm used by TimestampWritable.doubleToTimestamp method. + // Allocates a BigDecimal object! + + long seconds = (long) timestampSecondsWithFractionalNanoseconds; + + // We must ensure the exactness of the double's fractional portion. + // 0.6 as the fraction part will be converted to 0.59999... and + // significantly reduce the savings from binary serialization. + BigDecimal bd; + + bd = new BigDecimal(String.valueOf(timestampSecondsWithFractionalNanoseconds)); + bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000)); + + int nanos = bd.intValue(); + + // Convert to millis + long millis = seconds * 1000; + if (nanos < 0) { + millis -= 1000; + nanos += 1000000000; + } + + scratchTimestamp.setTime(millis); + scratchTimestamp.setNanos(nanos); + updateFromTimestamp(scratchTimestamp); + return this; + } + + /** + * Set this PisaTimestamp from a epoch seconds and signed nanos (-999999999 to 999999999). + * @param epochSeconds + * @param signedNanos + * @return this + */ + public PisaTimestamp updateFromEpochSecondsAndSignedNanos(long epochSeconds, int signedNanos) { + + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND + signedNanos; + long epochDay = epochSeconds / SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_DAY; + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + return this; + } + + /** + * Set a scratch PisaTimestamp with this PisaTimestamp's values and return the scratch object. + * @param epochDay + * @param nanoOfDay + */ + public PisaTimestamp scratchCopy(PisaTimestamp scratch) { + + scratch.epochDay = epochDay; + scratch.nanoOfDay = nanoOfDay; + return scratch; + } + + /** + * Set a Timestamp object from this PisaTimestamp. + * @param timestamp + */ + public void timestampUpdate(Timestamp timestamp) { + + /* + * java.sql.Timestamp consists of a long variable to store milliseconds and an integer variable for nanoseconds. + * The long variable is used to store only the full seconds converted to millis. For example for 1234 milliseconds, + * 1000 is stored in the long variable, and 234000000 (234 converted to nanoseconds) is stored as nanoseconds. + * The negative timestamps are also supported, but nanoseconds must be positive therefore millisecond part is + * reduced by one second. + */ + + long epochSeconds = epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + long integralSecInMillis; + int nanos = (int) (nanoOfDay % NANOSECONDS_PER_SECOND); // The nanoseconds. + if (nanos < 0) { + nanos = (int) NANOSECONDS_PER_SECOND + nanos; // The positive nano-part that will be added to milliseconds. + integralSecInMillis = (epochSeconds - 1) * MILLISECONDS_PER_SECOND; // Reduce by one second. + } else { + integralSecInMillis = epochSeconds * MILLISECONDS_PER_SECOND; // Full seconds converted to millis. + } + + timestamp.setTime(integralSecInMillis); + timestamp.setNanos(nanos); + } + + /** + * Return the scratch timestamp with values from Pisa timestamp. + * @return + */ + public Timestamp asScratchTimestamp() { + timestampUpdate(scratchTimestamp); + return scratchTimestamp; + } + + /** + * Return the scratch timestamp for use by the caller. + * @return + */ + public Timestamp useScratchTimestamp() { + return scratchTimestamp; + } + + public int compareTo(PisaTimestamp another) { + + if (epochDay == another.epochDay) { + if (nanoOfDay == another.nanoOfDay){ + return 0; + } else { + return (nanoOfDay < another.nanoOfDay ? -1 : 1); + } + } else { + return (epochDay < another.epochDay ? -1: 1); + } + } + + public static int compareTo(long epochDay1, long nanoOfDay1, PisaTimestamp another) { + + if (epochDay1 == another.epochDay) { + if (nanoOfDay1 == another.nanoOfDay){ + return 0; + } else { + return (nanoOfDay1 < another.nanoOfDay ? -1 : 1); + } + } else { + return (epochDay1 < another.epochDay ? -1: 1); + } + } + + public static int compareTo(PisaTimestamp pisaTimestamp1, long epochDay2, long nanoOfDay2) { + + if (pisaTimestamp1.epochDay == epochDay2) { + if (pisaTimestamp1.nanoOfDay == nanoOfDay2){ + return 0; + } else { + return (pisaTimestamp1.nanoOfDay < nanoOfDay2 ? -1 : 1); + } + } else { + return (pisaTimestamp1.epochDay < epochDay2 ? -1: 1); + } + } + + public static int compareTo(long epochDay1, long nanoOfDay1, long epochDay2, long nanoOfDay2) { + + if (epochDay1 == epochDay2) { + if (nanoOfDay1 == nanoOfDay2){ + return 0; + } else { + return (nanoOfDay1 < nanoOfDay2 ? -1 : 1); + } + } else { + return (epochDay1 < epochDay2 ? -1: 1); + } + } + + + /** + * Standard equals method override. + */ + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + return equals((PisaTimestamp) obj); + } + + public boolean equals(PisaTimestamp other) { + + if (epochDay == other.epochDay) { + if (nanoOfDay == other.nanoOfDay) { + return true; + } else { + return false; + } + } else { + return false; + } + } + + public static void add(PisaTimestamp pisaTimestamp1, PisaTimestamp pisaTimestamp2, + PisaTimestamp result) { + add(pisaTimestamp1.epochDay, pisaTimestamp1.nanoOfDay, + pisaTimestamp2.epochDay, pisaTimestamp2.nanoOfDay, + result); + } + + public static void add(long epochDay1, long nanoOfDay1, + long epochDay2, long nanoOfDay2, + PisaTimestamp result) { + + // Validate integrity rules between epoch day and nano of day. + Preconditions.checkState(PisaTimestamp.validateIntegrity(epochDay1, nanoOfDay1)); + Preconditions.checkState(PisaTimestamp.validateIntegrity(epochDay2, nanoOfDay2)); + + long intermediateEpochDay = epochDay1 + epochDay2; + long intermediateNanoOfDay = nanoOfDay1 + nanoOfDay2; + + // Normalize so both are positive or both are negative. + long normalizedEpochDay; + long normalizedNanoOfDay; + if (intermediateEpochDay > 0 && intermediateNanoOfDay < 0) { + normalizedEpochDay = intermediateEpochDay - 1; + normalizedNanoOfDay = intermediateNanoOfDay + NANOSECONDS_PER_DAY; + } else if (intermediateEpochDay < 0 && intermediateNanoOfDay > 0) { + normalizedEpochDay = intermediateEpochDay + 1; + normalizedNanoOfDay = intermediateNanoOfDay - NANOSECONDS_PER_DAY; + } else { + normalizedEpochDay = intermediateEpochDay; + normalizedNanoOfDay = intermediateNanoOfDay; + } + + long resultEpochDay; + long resultNanoOfDay; + if (normalizedNanoOfDay >= NANOSECONDS_PER_DAY || normalizedNanoOfDay <= -NANOSECONDS_PER_DAY) { + // Adjust for carry or overflow... + + resultEpochDay = normalizedEpochDay + normalizedNanoOfDay / NANOSECONDS_PER_DAY; + resultNanoOfDay = normalizedNanoOfDay % NANOSECONDS_PER_DAY; + + } else { + resultEpochDay = normalizedEpochDay; + resultNanoOfDay = normalizedNanoOfDay; + } + + // The update method will validate integrity rules between epoch day and nano of day, + // but not overflow/underflow of epoch day. + result.update(resultEpochDay, resultNanoOfDay); + } + + public static void addSeconds(PisaTimestamp timestamp1, long epochSeconds, PisaTimestamp result) { + long epochDay = epochSeconds / SECONDS_PER_DAY; + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND; + add(timestamp1.epochDay, timestamp1.nanoOfDay, epochDay, nanoOfDay, result); + } + + public static void subtract(PisaTimestamp timestamp1, PisaTimestamp timestamp2, + PisaTimestamp result) { + + add(timestamp1.epochDay, timestamp1.nanoOfDay, -timestamp2.epochDay, -timestamp2.nanoOfDay, + result); + } + + public static void subtract(long epochDay1, long nanoOfDay1, + long epochDay2, long nanoOfDay2, + PisaTimestamp result) { + + add(epochDay1, nanoOfDay1, -epochDay2, -nanoOfDay2, result); + } + + public static void subtractSeconds(PisaTimestamp timestamp1, long epochSeconds, + PisaTimestamp result) { + long epochDay = epochSeconds / SECONDS_PER_DAY; + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND; + add(timestamp1.epochDay, timestamp1.nanoOfDay, -epochDay, -nanoOfDay, result); + } + + /** + * Rounds the number of milliseconds relative to the epoch down to the nearest whole number of + * seconds. 500 would round to 0, -500 would round to -1. + */ + public static long timestampMillisToSeconds(long millis) { + if (millis >= 0) { + return millis / 1000; + } else { + return (millis - 999) / 1000; + } + } + + /** + * Return a double with the integer part as the seconds and the fractional part as + * the nanoseconds the way the Timestamp class does it. + * @return seconds.nanoseconds + */ + public double getTimestampSecondsWithFractionalNanos() { + // Algorithm must be the same as TimestampWritable.getDouble method. + timestampUpdate(scratchTimestamp); + double seconds = timestampMillisToSeconds(scratchTimestamp.getTime()); + double nanos = scratchTimestamp.getNanos(); + return seconds + nanos / PisaTimestamp.NANOSECONDS_PER_SECOND; + } + + /** + * Return an integer as the seconds the way the Timestamp class does it. + * @return seconds.nanoseconds + */ + public long getTimestampSeconds() { + // Algorithm must be the same as TimestampWritable.getSeconds method. + timestampUpdate(scratchTimestamp); + return timestampMillisToSeconds(scratchTimestamp.getTime()); + } + + /** + * Return an integer as the milliseconds the way the Timestamp class does it. + * @return seconds.nanoseconds + */ + public long getTimestampMilliseconds() { + timestampUpdate(scratchTimestamp); + return scratchTimestamp.getTime(); + } + + /** + * Return the epoch seconds. + * @return + */ + public long getEpochSeconds() { + return epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + } + + /** + * Return the epoch seconds, given the epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return + */ + public static long getEpochSecondsFromEpochDayAndNanoOfDay(long epochDay, long nanoOfDay) { + return epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + } + + /** + * Return the signed nanos (-999999999 to 999999999). + * NOTE: Not the same as Timestamp class nanos (which are always positive). + */ + public int getSignedNanos() { + return (int) (nanoOfDay % NANOSECONDS_PER_SECOND); + } + + /** + * Return the signed nanos (-999999999 to 999999999). + * NOTE: Not the same as Timestamp class nanos (which are always positive). + */ + public static int getSignedNanos(long nanoOfDay) { + return (int) (nanoOfDay % NANOSECONDS_PER_SECOND); + } + + /** + * Return the epoch milliseconds. + * @return + */ + public long getEpochMilliseconds() { + return epochDay * MILLISECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_MILLISECOND; + } + + /** + * Return the epoch seconds, given the epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return + */ + public static long getEpochMillisecondsFromEpochDayAndNanoOfDay(long epochDay, long nanoOfDay) { + return epochDay * MILLISECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_MILLISECOND; + } + + @Override + public int hashCode() { + // UNDONE: We don't want to box the longs just to get the hash codes... + return new Long(epochDay).hashCode() ^ new Long(nanoOfDay).hashCode(); + } + + @Override + public String toString() { + timestampUpdate(scratchTimestamp); + return scratchTimestamp.toString(); + } + + public static Timestamp getRandTimestamp(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt((int) NANOSECONDS_PER_SECOND))); + } + String timestampStr = String.format("%04d-%02d-%02d %02d:%02d:%02d%s", + Integer.valueOf(0 + r.nextInt(10000)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + Timestamp timestampVal; + try { + timestampVal = Timestamp.valueOf(timestampStr); + } catch (Exception e) { + System.err.println("Timestamp string " + timestampStr + " did not parse"); + throw e; + } + return timestampVal; + } +} \ No newline at end of file diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index fcb1ae9..4ae9c47 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -42,6 +42,7 @@ DOUBLE, BYTES, DECIMAL, + TIMESTAMP, STRUCT, LIST, MAP, diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java new file mode 100644 index 0000000..6968a7a --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -0,0 +1,498 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Timestamp; +import java.util.Arrays; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + +/** + * This class represents a nullable timestamp column vector capable of handing a wide range of + * timestamp values. + * + * We use the PisaTimestamp which is designed to be mutable and avoid the heavy memory allocation + * and CPU data cache miss costs. + */ +public class TimestampColumnVector extends ColumnVector { + + /* + * The storage arrays for this column vector corresponds to the storage of a PisaTimestamp: + */ + private long[] epochDay; + // An array of the number of days since 1970-01-01 (similar to Java 8 LocalDate). + + private long[] nanoOfDay; + // An array of the number of nanoseconds within the day, with the range of + // 0 to 24 * 60 * 60 * 1,000,000,000 - 1 (similar to Java 8 LocalTime). + + /* + * Scratch objects. + */ + private PisaTimestamp scratchPisaTimestamp; + // Convenience scratch Pisa timestamp object. + + private Writable scratchWritable; + // Supports keeping a TimestampWritable object without having to import that definition... + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public TimestampColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public TimestampColumnVector(int len) { + super(len); + + epochDay = new long[len]; + nanoOfDay = new long[len]; + + scratchPisaTimestamp = new PisaTimestamp(); + + scratchWritable = null; // Allocated by caller. + } + + /** + * Return the number of rows. + * @return + */ + public int getLength() { + return epochDay.length; + } + + /** + * Returnt a row's epoch day. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochDay(int elementNum) { + return epochDay[elementNum]; + } + + /** + * Return a row's nano of day. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getNanoOfDay(int elementNum) { + return nanoOfDay[elementNum]; + } + + /** + * Get a scratch PisaTimestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return scratch + */ + public PisaTimestamp asScratchPisaTimestamp(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp; + } + + /** + * Set a PisaTimestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param pisaTimestamp + * @param elementNum + */ + public void pisaTimestampUpdate(PisaTimestamp pisaTimestamp, int elementNum) { + pisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Set a Timestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param timestamp + * @param elementNum + */ + public void timestampUpdate(Timestamp timestamp, int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + scratchPisaTimestamp.timestampUpdate(timestamp); + } + + /** + * Compare row to PisaTimestamp. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @param pisaTimestamp + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(int elementNum, PisaTimestamp pisaTimestamp) { + return PisaTimestamp.compareTo(epochDay[elementNum], nanoOfDay[elementNum], pisaTimestamp); + } + + /** + * Compare PisaTimestamp to row. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param pisaTimestamp + * @param elementNum + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(PisaTimestamp pisaTimestamp, int elementNum) { + return PisaTimestamp.compareTo(pisaTimestamp, epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Compare a row to another TimestampColumnVector's row. + * @param elementNum1 + * @param timestampColVector2 + * @param elementNum2 + * @return + */ + public int compareTo(int elementNum1, TimestampColumnVector timestampColVector2, + int elementNum2) { + return PisaTimestamp.compareTo( + epochDay[elementNum1], nanoOfDay[elementNum1], + timestampColVector2.epochDay[elementNum2], timestampColVector2.nanoOfDay[elementNum2]); + } + + /** + * Compare another TimestampColumnVector's row to a row. + * @param timestampColVector1 + * @param elementNum1 + * @param elementNum2 + * @return + */ + public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1, + int elementNum2) { + return PisaTimestamp.compareTo( + timestampColVector1.epochDay[elementNum1], timestampColVector1.nanoOfDay[elementNum1], + epochDay[elementNum2], nanoOfDay[elementNum2]); + } + + public void add(PisaTimestamp timestamp1, PisaTimestamp timestamp2, int resultElementNum) { + PisaTimestamp.add(timestamp1, timestamp2, scratchPisaTimestamp); + epochDay[resultElementNum] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[resultElementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + public void subtract(PisaTimestamp timestamp1, PisaTimestamp timestamp2, int resultElementNum) { + PisaTimestamp.subtract(timestamp1, timestamp2, scratchPisaTimestamp); + epochDay[resultElementNum] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[resultElementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Return row as a double with the integer part as the seconds and the fractional part as + * the nanoseconds the way the Timestamp class does it. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return seconds.nanoseconds + */ + public double getTimestampSecondsWithFractionalNanos(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.getTimestampSecondsWithFractionalNanos(); + } + + /** + * Return row as integer as the seconds the way the Timestamp class does it. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return seconds + */ + public long getTimestampSeconds(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.getTimestampSeconds(); + } + + + /** + * Return row as milliseconds the way the Timestamp class does it. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getTimestampMilliseconds(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.getTimestampMilliseconds(); + } + + /** + * Return row as epoch seconds. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochSeconds(int elementNum) { + return PisaTimestamp.getEpochSecondsFromEpochDayAndNanoOfDay(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Return row as epoch milliseconds. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochMilliseconds(int elementNum) { + return PisaTimestamp.getEpochMillisecondsFromEpochDayAndNanoOfDay(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Return row as signed nanos (-999999999 to 999999999). + * NOTE: This is not the same as the Timestamp class nanos (which is always positive). + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public int getSignedNanos(int elementNum) { + return PisaTimestamp.getSignedNanos(nanoOfDay[elementNum]); + } + + /** + * Get scratch timestamp with value of a row. + * @param elementNum + * @return + */ + public Timestamp asScratchTimestamp(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp.asScratchTimestamp(); + } + + /** + * Get scratch Pisa timestamp for use by the caller. + * @return + */ + public PisaTimestamp useScratchPisaTimestamp() { + return scratchPisaTimestamp; + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector; + + epochDay[outElementNum] = timestampColVector.epochDay[inputElementNum]; + nanoOfDay[outElementNum] = timestampColVector.nanoOfDay[inputElementNum]; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + long repeatEpochDay = epochDay[0]; + long repeatNanoOfDay = nanoOfDay[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + epochDay[i] = repeatEpochDay; + nanoOfDay[i] = repeatNanoOfDay; + } + } else { + Arrays.fill(epochDay, 0, size, repeatEpochDay); + Arrays.fill(nanoOfDay, 0, size, repeatNanoOfDay); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + /** + * Set a row from a PisaTimestamp. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param pisaTimestamp + */ + public void set(int elementNum, PisaTimestamp pisaTimestamp) { + this.epochDay[elementNum] = pisaTimestamp.getEpochDay(); + this.nanoOfDay[elementNum] = pisaTimestamp.getNanoOfDay(); + } + + /** + * Set a row from a timestamp. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void set(int elementNum, Timestamp timestamp) { + scratchPisaTimestamp.updateFromTimestamp(timestamp); + this.epochDay[elementNum] = scratchPisaTimestamp.getEpochDay(); + this.nanoOfDay[elementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Set a row from a epoch seconds and signed nanos (-999999999 to 999999999). + * @param elementNum + * @param epochSeconds + * @param signedNanos + */ + public void setEpochSecondsAndSignedNanos(int elementNum, long epochSeconds, int signedNanos) { + scratchPisaTimestamp.updateFromEpochSecondsAndSignedNanos(epochSeconds, signedNanos); + set(elementNum, scratchPisaTimestamp); + } + + /** + * Set a row from timestamp milliseconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestampMilliseconds + */ + public void setTimestampMilliseconds(int elementNum, long timestampMilliseconds) { + scratchPisaTimestamp.updateFromTimestampMilliseconds(timestampMilliseconds); + set(elementNum, scratchPisaTimestamp.useScratchTimestamp()); + } + + /** + * Set a row from timestamp seconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void setTimestampSeconds(int elementNum, long timestampSeconds) { + scratchPisaTimestamp.updateFromTimestampSeconds(timestampSeconds); + set(elementNum, scratchPisaTimestamp); + } + + /** + * Set a row from a double timestamp seconds with fractional nanoseconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void setTimestampSecondsWithFractionalNanoseconds(int elementNum, + double secondsWithFractionalNanoseconds) { + scratchPisaTimestamp.updateFromTimestampSecondsWithFractionalNanoseconds(secondsWithFractionalNanoseconds); + set(elementNum, scratchPisaTimestamp); + } + + /** + * Set row to standard null value(s). + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + */ + public void setNullValue(int elementNum) { + epochDay[elementNum] = 1; + nanoOfDay[elementNum] = 1; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.epochDay[0] = epochDay[0]; + output.nanoOfDay[0] = nanoOfDay[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.epochDay[i] = epochDay[i]; + output.nanoOfDay[i] = nanoOfDay[i]; + } + } + else { + System.arraycopy(epochDay, 0, output.epochDay, 0, size); + System.arraycopy(nanoOfDay, 0, output.nanoOfDay, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + /** + * Fill all the vector entries with a PisaTimestamp. + * @param pisaTimestamp + */ + public void fill(PisaTimestamp pisaTimestamp) { + noNulls = true; + isRepeating = true; + epochDay[0] = pisaTimestamp.getEpochDay(); + nanoOfDay[0] = pisaTimestamp.getNanoOfDay(); + } + + /** + * Fill all the vector entries with a timestamp. + * @param timestamp + */ + public void fill(Timestamp timestamp) { + noNulls = true; + isRepeating = true; + scratchPisaTimestamp.updateFromTimestamp(timestamp); + epochDay[0] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[0] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Return a convenience writable object stored by this column vector. + * Supports keeping a TimestampWritable object without having to import that definition... + * @return + */ + public Writable getScratchWritable() { + return scratchWritable; + } + + /** + * Set the convenience writable object stored by this column vector + * @param scratchWritable + */ + public void setScratchWritable(Writable scratchWritable) { + this.scratchWritable = scratchWritable; + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + scratchPisaTimestamp.update(epochDay[row], nanoOfDay[row]); + buffer.append(scratchPisaTimestamp.toString()); + } else { + buffer.append("null"); + } + } +} \ No newline at end of file diff --git storage-api/src/test/org/apache/hadoop/hive/common/type/TestPisaTimestamp.java storage-api/src/test/org/apache/hadoop/hive/common/type/TestPisaTimestamp.java new file mode 100644 index 0000000..74305f2 --- /dev/null +++ storage-api/src/test/org/apache/hadoop/hive/common/type/TestPisaTimestamp.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.type; + +import org.junit.Test; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.sql.Timestamp; +import java.util.Random; + +import static org.junit.Assert.*; + +/** + * Test for ListColumnVector + */ +public class TestPisaTimestamp { + + private static int TEST_COUNT = 5000; + + @Test + public void testPisaTimestampCreate() throws Exception { + + Random r = new Random(1234); + + for (int i = 0; i < TEST_COUNT; i++) { + Timestamp randTimestamp = PisaTimestamp.getRandTimestamp(r); + PisaTimestamp pisaTimestamp = new PisaTimestamp(randTimestamp); + Timestamp reconstructedTimestamp = new Timestamp(0); + pisaTimestamp.timestampUpdate(reconstructedTimestamp); + if (!randTimestamp.equals(reconstructedTimestamp)) { + assertTrue(false); + } + } + } + + static BigDecimal BIG_MAX_LONG = new BigDecimal(Long.MAX_VALUE); + static BigDecimal BIG_MIN_LONG = new BigDecimal(Long.MIN_VALUE); + static BigDecimal BIG_NANOSECONDS_PER_DAY = new BigDecimal(PisaTimestamp.NANOSECONDS_PER_DAY); + + static boolean beyondLongRange = false; + + private BigDecimal[] randomEpochDayAndNanoOfDay(Random r) { + double randDouble = (r.nextDouble() - 0.5D) * 2.0D; + randDouble *= PisaTimestamp.NANOSECONDS_PER_DAY; + randDouble *= 365 * 10000; + BigDecimal bigDecimal = new BigDecimal(randDouble); + bigDecimal = bigDecimal.setScale(0, RoundingMode.HALF_UP); + + if (bigDecimal.compareTo(BIG_MAX_LONG) > 0 || bigDecimal.compareTo(BIG_MIN_LONG) < 0) { + beyondLongRange = true; + } + + BigDecimal[] divideAndRemainder = bigDecimal.divideAndRemainder(BIG_NANOSECONDS_PER_DAY); + + return new BigDecimal[] {divideAndRemainder[0], divideAndRemainder[1], bigDecimal}; + } + + private BigDecimal pisaTimestampToBig(PisaTimestamp pisaTimestamp) { + BigDecimal bigNanoOfDay = new BigDecimal(pisaTimestamp.getNanoOfDay()); + + BigDecimal bigEpochDay = new BigDecimal(pisaTimestamp.getEpochDay()); + BigDecimal result = bigEpochDay.multiply(BIG_NANOSECONDS_PER_DAY); + result = result.add(bigNanoOfDay); + return result; + } + + @Test + public void testPisaTimestampArithmetic() throws Exception { + + Random r = new Random(1234); + + + for (int i = 0; i < TEST_COUNT; i++) { + BigDecimal[] random1 = randomEpochDayAndNanoOfDay(r); + long epochDay1 = random1[0].longValue(); + long nanoOfDay1 = random1[1].longValue(); + PisaTimestamp pisa1 = new PisaTimestamp(epochDay1, nanoOfDay1); + BigDecimal big1 = random1[2]; + + BigDecimal[] random2 = randomEpochDayAndNanoOfDay(r); + long epochDay2 = random2[0].longValue(); + long nanoOfDay2 = random2[1].longValue(); + PisaTimestamp pisa2 = new PisaTimestamp(epochDay2, nanoOfDay2); + BigDecimal big2 = random2[2]; + + BigDecimal expectedBig; + PisaTimestamp pisaResult = new PisaTimestamp(); + if (i % 2 == 0) { + expectedBig = big1.add(big2); + PisaTimestamp.add(pisa1, pisa2, pisaResult); + } else { + expectedBig = big1.add(big2.negate()); + PisaTimestamp.subtract(pisa1, pisa2, pisaResult); + } + BigDecimal resultBig = pisaTimestampToBig(pisaResult); + assertEquals(expectedBig, resultBig); + + } + } +}