diff --git ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index 6c57da2..3745bd8 100644 --- ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -35,7 +35,40 @@ private static String [][] templateExpansions = { - // The following datetime/interval arithmetic operations can be done using the vectorized values + + /** + * date is stored in a LongColumnVector as epochDays + * interval_year_month is stored in a LongColumnVector as epochMonths + * + * interval_day_time and timestamp are stored in a TimestampColumnVector (2 longs to hold + * very large number of nanoseconds) + * + * date – date --> type: interval_day_time + * timestamp – date --> type: interval_day_time + * date – timestamp --> type: interval_day_time + * timestamp – timestamp --> type: interval_day_time + * + * date +|- interval_day_time --> type: timestamp + * interval_day_time + date --> type: timestamp + * + * timestamp +|- interval_day_time --> type: timestamp + * interval_day_time +|- timestamp --> type: timestamp + * + * date +|- interval_year_month --> type: date + * interval_year_month + date --> type: date + * + * timestamp +|- interval_year_month --> type: timestamp + * interval_year_month + timestamp --> type: timestamp + * + * Adding/Subtracting months done with Calendar object + * + * Timestamp Compare with Long with long interpreted as seconds + * Timestamp Compare with Double with double interpreted as seconds with fractional nanoseconds + * + */ + + // The following datetime/interval arithmetic operations can be done using the vectorized values. + // Type interval_year_month (LongColumnVector storing months). {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "interval_year_month", "interval_year_month", "+"}, {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "interval_year_month", "interval_year_month", "+"}, {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "interval_year_month", "interval_year_month", "+"}, @@ -44,80 +77,114 @@ {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "interval_year_month", "interval_year_month", "-"}, {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "interval_year_month", "interval_year_month", "-"}, - {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "interval_day_time", "interval_day_time", "+"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "interval_day_time", "+"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "interval_day_time", "+"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Subtract", "interval_day_time", "interval_day_time", "-"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "interval_day_time", "interval_day_time", "-"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "interval_day_time", "interval_day_time", "-"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "interval_day_time", "timestamp", "+"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "timestamp", "+"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "interval_day_time", "timestamp", "+"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Add", "timestamp", "interval_day_time", "+"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Add", "timestamp", "interval_day_time", "+"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Add", "timestamp", "interval_day_time", "+"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Subtract", "timestamp", "interval_day_time", "-"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "interval_day_time", "-"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "interval_day_time", "-"}, - - {"DTIColumnArithmeticDTIScalarNoConvert", "Subtract", "timestamp", "timestamp", "-"}, - {"DTIScalarArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "timestamp", "-"}, - {"DTIColumnArithmeticDTIColumnNoConvert", "Subtract", "timestamp", "timestamp", "-"}, - - // The following datetime/interval arithmetic functions require type conversion for one or both operands - {"ColumnArithmeticColumnWithConvert", "Subtract", "date", "date", "-", "TimestampUtils.daysToNanoseconds", "TimestampUtils.daysToNanoseconds"}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "date", "date", "-", "TimestampUtils.daysToNanoseconds", "TimestampUtils.daysToNanoseconds"}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "date", "date", "-", "TimestampUtils.daysToNanoseconds", "TimestampUtils.daysToNanoseconds"}, - - {"ColumnArithmeticColumnWithConvert", "Subtract", "date", "timestamp", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "date", "timestamp", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "date", "timestamp", "-", "TimestampUtils.daysToNanoseconds", ""}, - - {"ColumnArithmeticColumnWithConvert", "Subtract", "timestamp", "date", "-", "", "TimestampUtils.daysToNanoseconds"}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "timestamp", "date", "-", "", "TimestampUtils.daysToNanoseconds"}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "timestamp", "date", "-", "", "TimestampUtils.daysToNanoseconds"}, - - {"ColumnArithmeticColumnWithConvert", "Add", "date", "interval_day_time", "+", "TimestampUtils.daysToNanoseconds", ""}, - {"ScalarArithmeticColumnWithConvert", "Add", "date", "interval_day_time", "+", "TimestampUtils.daysToNanoseconds", ""}, - {"ColumnArithmeticScalarWithConvert", "Add", "date", "interval_day_time", "+", "TimestampUtils.daysToNanoseconds", ""}, - - {"ColumnArithmeticColumnWithConvert", "Subtract", "date", "interval_day_time", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ScalarArithmeticColumnWithConvert", "Subtract", "date", "interval_day_time", "-", "TimestampUtils.daysToNanoseconds", ""}, - {"ColumnArithmeticScalarWithConvert", "Subtract", "date", "interval_day_time", "-", "TimestampUtils.daysToNanoseconds", ""}, - - {"ColumnArithmeticColumnWithConvert", "Add", "interval_day_time", "date", "+", "", "TimestampUtils.daysToNanoseconds"}, - {"ScalarArithmeticColumnWithConvert", "Add", "interval_day_time", "date", "+", "", "TimestampUtils.daysToNanoseconds"}, - {"ColumnArithmeticScalarWithConvert", "Add", "interval_day_time", "date", "+", "", "TimestampUtils.daysToNanoseconds"}, - - // Most year-month interval arithmetic needs its own generation - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Add", "date", "interval_year_month", "+", "", "dtm.addMonthsToDays"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Add", "date", "interval_year_month", "+", "", "dtm.addMonthsToDays"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Add", "date", "interval_year_month", "+", "", "dtm.addMonthsToDays"}, - - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Subtract", "date", "interval_year_month", "-", "", "dtm.addMonthsToDays"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Subtract", "date", "interval_year_month", "-", "", "dtm.addMonthsToDays"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Subtract", "date", "interval_year_month", "-", "", "dtm.addMonthsToDays"}, - - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Add", "timestamp", "interval_year_month", "+", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Add", "timestamp", "interval_year_month", "+", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Add", "timestamp", "interval_year_month", "+", "", "dtm.addMonthsToNanosUtc"}, - - {"DateTimeColumnArithmeticIntervalColumnWithConvert", "Subtract", "timestamp", "interval_year_month", "-", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeScalarArithmeticIntervalColumnWithConvert", "Subtract", "timestamp", "interval_year_month", "-", "", "dtm.addMonthsToNanosUtc"}, - {"DateTimeColumnArithmeticIntervalScalarWithConvert", "Subtract", "timestamp", "interval_year_month", "-", "", "dtm.addMonthsToNanosUtc"}, - - {"IntervalColumnArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "date", "+", "", "dtm.addMonthsToDays"}, - {"IntervalScalarArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "date", "+", "", "dtm.addMonthsToDays"}, - {"IntervalColumnArithmeticDateTimeScalarWithConvert", "Add", "interval_year_month", "date", "+", "", "dtm.addMonthsToDays"}, - - {"IntervalColumnArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "timestamp", "+", "", "dtm.addMonthsToNanosUtc"}, - {"IntervalScalarArithmeticDateTimeColumnWithConvert", "Add", "interval_year_month", "timestamp", "+", "", "dtm.addMonthsToNanosUtc"}, - {"IntervalColumnArithmeticDateTimeScalarWithConvert", "Add", "interval_year_month", "timestamp", "+", "", "dtm.addMonthsToNanosUtc"}, - + // Arithmetic on two TimestampColumnVector base classes. + {"TimestampColumnArithmeticTimestampColumnBase", "Add", "Col", "Column"}, + {"TimestampScalarArithmeticTimestampColumnBase", "Add", "Scalar", "Column"}, + {"TimestampColumnArithmeticTimestampScalarBase", "Add", "Col", "Scalar"}, + + {"TimestampColumnArithmeticTimestampColumnBase", "Subtract", "Col", "Column"}, + {"TimestampScalarArithmeticTimestampColumnBase", "Subtract", "Scalar", "Column"}, + {"TimestampColumnArithmeticTimestampScalarBase", "Subtract", "Col", "Scalar"}, + + // Arithmetic on two type interval_day_time (TimestampColumnVector storing nanosecond interval + // in 2 longs) produces a interval_day_time. + {"TimestampColumnArithmeticTimestampScalar", "Add", "interval_day_time", "Col", "interval_day_time", "Scalar"}, + {"TimestampScalarArithmeticTimestampColumn", "Add", "interval_day_time", "Scalar", "interval_day_time", "Column"}, + {"TimestampColumnArithmeticTimestampColumn", "Add", "interval_day_time", "Col", "interval_day_time", "Column"}, + + {"TimestampColumnArithmeticTimestampScalar", "Subtract", "interval_day_time", "Col", "interval_day_time", "Scalar"}, + {"TimestampScalarArithmeticTimestampColumn", "Subtract", "interval_day_time", "Scalar", "interval_day_time", "Column"}, + {"TimestampColumnArithmeticTimestampColumn", "Subtract", "interval_day_time", "Col", "interval_day_time", "Column"}, + + // A type timestamp (TimestampColumnVector) plus/minus a type interval_day_time (TimestampColumnVector + // storing nanosecond interval in 2 longs) produces a timestamp. + {"TimestampColumnArithmeticTimestampScalar", "Add", "interval_day_time", "Col", "timestamp", "Scalar"}, + {"TimestampScalarArithmeticTimestampColumn", "Add", "interval_day_time", "Scalar", "timestamp", "Column"}, + {"TimestampColumnArithmeticTimestampColumn", "Add", "interval_day_time", "Col", "timestamp", "Column"}, + + {"TimestampColumnArithmeticTimestampScalar", "Add", "timestamp", "Col", "interval_day_time", "Scalar"}, + {"TimestampScalarArithmeticTimestampColumn", "Add", "timestamp", "Scalar", "interval_day_time", "Column"}, + {"TimestampColumnArithmeticTimestampColumn", "Add", "timestamp", "Col", "interval_day_time", "Column"}, + + {"TimestampColumnArithmeticTimestampScalar", "Subtract", "timestamp", "Col", "interval_day_time", "Scalar"}, + {"TimestampScalarArithmeticTimestampColumn", "Subtract", "timestamp", "Scalar", "interval_day_time", "Column"}, + {"TimestampColumnArithmeticTimestampColumn", "Subtract", "timestamp", "Col", "interval_day_time", "Column"}, + + // A type timestamp (TimestampColumnVector) minus a type timestamp produces a + // type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). + {"TimestampColumnArithmeticTimestampScalar", "Subtract", "timestamp", "Col", "timestamp", "Scalar"}, + {"TimestampScalarArithmeticTimestampColumn", "Subtract", "timestamp", "Scalar", "timestamp", "Column"}, + {"TimestampColumnArithmeticTimestampColumn", "Subtract", "timestamp", "Col", "timestamp", "Column"}, + + // Arithmetic on a TimestampColumnVector and date base classes. + {"DateColumnArithmeticTimestampColumnBase", "Add", "Col", "Column"}, + {"DateScalarArithmeticTimestampColumnBase", "Add", "Scalar", "Column"}, + {"DateColumnArithmeticTimestampScalarBase", "Add", "Col", "Scalar"}, + + {"DateColumnArithmeticTimestampColumnBase", "Subtract", "Col", "Column"}, + {"DateScalarArithmeticTimestampColumnBase", "Subtract", "Scalar", "Column"}, + {"DateColumnArithmeticTimestampScalarBase", "Subtract", "Col", "Scalar"}, + + {"TimestampColumnArithmeticDateColumnBase", "Add", "Col", "Column"}, + {"TimestampScalarArithmeticDateColumnBase", "Add", "Scalar", "Column"}, + {"TimestampColumnArithmeticDateScalarBase", "Add", "Col", "Scalar"}, + + {"TimestampColumnArithmeticDateColumnBase", "Subtract", "Col", "Column"}, + {"TimestampScalarArithmeticDateColumnBase", "Subtract", "Scalar", "Column"}, + {"TimestampColumnArithmeticDateScalarBase", "Subtract", "Col", "Scalar"}, + + // Arithmetic with a type date (LongColumnVector storing epoch days) and type interval_day_time (TimestampColumnVector storing + // nanosecond interval in 2 longs) produces a type timestamp (TimestampColumnVector). + {"DateColumnArithmeticTimestampColumn", "Add", "date", "Col", "interval_day_time", "Column"}, + {"DateScalarArithmeticTimestampColumn", "Add", "date", "Scalar", "interval_day_time", "Column"}, + {"DateColumnArithmeticTimestampScalar", "Add", "date", "Col", "interval_day_time", "Scalar"}, + + {"DateColumnArithmeticTimestampColumn", "Subtract", "date", "Col", "interval_day_time", "Column"}, + {"DateScalarArithmeticTimestampColumn", "Subtract", "date", "Scalar", "interval_day_time", "Column"}, + {"DateColumnArithmeticTimestampScalar", "Subtract", "date", "Col", "interval_day_time", "Scalar"}, + + {"TimestampColumnArithmeticDateColumn", "Add", "interval_day_time", "Col", "date", "Column"}, + {"TimestampScalarArithmeticDateColumn", "Add", "interval_day_time", "Scalar", "date", "Column"}, + {"TimestampColumnArithmeticDateScalar", "Add", "interval_day_time", "Col", "date", "Scalar"}, + + // Subtraction with a type date (LongColumnVector storing epoch days) and type timestamp produces a type timestamp (TimestampColumnVector). + {"DateColumnArithmeticTimestampColumn", "Subtract", "date", "Col", "timestamp", "Column"}, + {"DateScalarArithmeticTimestampColumn", "Subtract", "date", "Scalar", "timestamp", "Column"}, + {"DateColumnArithmeticTimestampScalar", "Subtract", "date", "Col", "timestamp", "Scalar"}, + + {"TimestampColumnArithmeticDateColumn", "Subtract", "timestamp", "Col", "date", "Column"}, + {"TimestampScalarArithmeticDateColumn", "Subtract", "timestamp", "Scalar", "date", "Column"}, + {"TimestampColumnArithmeticDateScalar", "Subtract", "timestamp", "Col", "date", "Scalar"}, + + // Arithmetic with a type date (LongColumnVector storing epoch days) and type interval_year_month (LongColumnVector storing + // months) produces a type date via a calendar calculation. + {"DateColumnArithmeticIntervalYearMonthColumn", "Add", "+", "date", "Col", "interval_year_month", "Column"}, + {"DateScalarArithmeticIntervalYearMonthColumn", "Add", "+", "date", "Scalar", "interval_year_month", "Column"}, + {"DateColumnArithmeticIntervalYearMonthScalar", "Add", "+", "date", "Col", "interval_year_month", "Scalar"}, + + {"DateColumnArithmeticIntervalYearMonthColumn", "Subtract", "-", "date", "Col", "interval_year_month", "Column"}, + {"DateScalarArithmeticIntervalYearMonthColumn", "Subtract", "-", "date", "Scalar", "interval_year_month", "Column"}, + {"DateColumnArithmeticIntervalYearMonthScalar", "Subtract", "-", "date", "Col", "interval_year_month", "Scalar"}, + + {"IntervalYearMonthColumnArithmeticDateColumn", "Add", "+", "interval_year_month", "Col", "date", "Column"}, + {"IntervalYearMonthScalarArithmeticDateColumn", "Add", "+", "interval_year_month", "Scalar", "date", "Column"}, + {"IntervalYearMonthColumnArithmeticDateScalar", "Add", "+", "interval_year_month", "Col", "date", "Scalar"}, + + // Arithmetic with a type timestamp (TimestampColumnVector) and type interval_year_month (LongColumnVector storing + // months) produces a type timestamp via a calendar calculation. + {"TimestampColumnArithmeticIntervalYearMonthColumn", "Add", "+", "timestamp", "Col", "interval_year_month", "Column"}, + {"TimestampScalarArithmeticIntervalYearMonthColumn", "Add", "+", "timestamp", "Scalar", "interval_year_month", "Column"}, + {"TimestampColumnArithmeticIntervalYearMonthScalar", "Add", "+", "timestamp", "Col", "interval_year_month", "Scalar"}, + + {"TimestampColumnArithmeticIntervalYearMonthColumn", "Subtract", "-", "timestamp", "Col", "interval_year_month", "Column"}, + {"TimestampScalarArithmeticIntervalYearMonthColumn", "Subtract", "-", "timestamp", "Scalar", "interval_year_month", "Column"}, + {"TimestampColumnArithmeticIntervalYearMonthScalar", "Subtract", "-", "timestamp", "Col", "interval_year_month", "Scalar"}, + + {"IntervalYearMonthColumnArithmeticTimestampColumn", "Add","+", "interval_year_month", "Col", "timestamp", "Column"}, + {"IntervalYearMonthScalarArithmeticTimestampColumn", "Add","+", "interval_year_month", "Scalar", "timestamp", "Column"}, + {"IntervalYearMonthColumnArithmeticTimestampScalar", "Add","+", "interval_year_month", "Col", "timestamp", "Scalar"}, + + // Long/double arithmetic {"ColumnArithmeticScalar", "Add", "long", "long", "+"}, {"ColumnArithmeticScalar", "Subtract", "long", "long", "-"}, {"ColumnArithmeticScalar", "Multiply", "long", "long", "*"}, @@ -251,46 +318,109 @@ {"ScalarCompareColumn", "Greater", "double", "long", ">"}, {"ScalarCompareColumn", "GreaterEqual", "double", "long", ">="}, - {"TimestampColumnCompareTimestampScalar", "Equal"}, - {"TimestampColumnCompareTimestampScalar", "NotEqual"}, - {"TimestampColumnCompareTimestampScalar", "Less"}, - {"TimestampColumnCompareTimestampScalar", "LessEqual"}, - {"TimestampColumnCompareTimestampScalar", "Greater"}, - {"TimestampColumnCompareTimestampScalar", "GreaterEqual"}, - - {"TimestampColumnCompareScalar", "Equal", "long"}, - {"TimestampColumnCompareScalar", "Equal", "double"}, - {"TimestampColumnCompareScalar", "NotEqual", "long"}, - {"TimestampColumnCompareScalar", "NotEqual", "double"}, - {"TimestampColumnCompareScalar", "Less", "long"}, - {"TimestampColumnCompareScalar", "Less", "double"}, - {"TimestampColumnCompareScalar", "LessEqual", "long"}, - {"TimestampColumnCompareScalar", "LessEqual", "double"}, - {"TimestampColumnCompareScalar", "Greater", "long"}, - {"TimestampColumnCompareScalar", "Greater", "double"}, - {"TimestampColumnCompareScalar", "GreaterEqual", "long"}, - {"TimestampColumnCompareScalar", "GreaterEqual", "double"}, - - {"TimestampScalarCompareTimestampColumn", "Equal"}, - {"TimestampScalarCompareTimestampColumn", "NotEqual"}, - {"TimestampScalarCompareTimestampColumn", "Less"}, - {"TimestampScalarCompareTimestampColumn", "LessEqual"}, - {"TimestampScalarCompareTimestampColumn", "Greater"}, - {"TimestampScalarCompareTimestampColumn", "GreaterEqual"}, - - {"ScalarCompareTimestampColumn", "Equal", "long"}, - {"ScalarCompareTimestampColumn", "Equal", "double"}, - {"ScalarCompareTimestampColumn", "NotEqual", "long"}, - {"ScalarCompareTimestampColumn", "NotEqual", "double"}, - {"ScalarCompareTimestampColumn", "Less", "long"}, - {"ScalarCompareTimestampColumn", "Less", "double"}, - {"ScalarCompareTimestampColumn", "LessEqual", "long"}, - {"ScalarCompareTimestampColumn", "LessEqual", "double"}, - {"ScalarCompareTimestampColumn", "Greater", "long"}, - {"ScalarCompareTimestampColumn", "Greater", "double"}, - {"ScalarCompareTimestampColumn", "GreaterEqual", "long"}, - {"ScalarCompareTimestampColumn", "GreaterEqual", "double"}, - + // Compare timestamp to timestamp. + {"TimestampColumnCompareTimestampColumn", "Equal", "=="}, + {"TimestampColumnCompareTimestampColumn", "NotEqual", "!="}, + {"TimestampColumnCompareTimestampColumn", "Less", "<"}, + {"TimestampColumnCompareTimestampColumn", "LessEqual", "<="}, + {"TimestampColumnCompareTimestampColumn", "Greater", ">"}, + {"TimestampColumnCompareTimestampColumn", "GreaterEqual", ">="}, + + {"TimestampColumnCompareTimestampScalar", "Equal", "=="}, + {"TimestampColumnCompareTimestampScalar", "NotEqual", "!="}, + {"TimestampColumnCompareTimestampScalar", "Less", "<"}, + {"TimestampColumnCompareTimestampScalar", "LessEqual", "<="}, + {"TimestampColumnCompareTimestampScalar", "Greater", ">"}, + {"TimestampColumnCompareTimestampScalar", "GreaterEqual", ">="}, + + {"TimestampScalarCompareTimestampColumn", "Equal", "=="}, + {"TimestampScalarCompareTimestampColumn", "NotEqual", "!="}, + {"TimestampScalarCompareTimestampColumn", "Less", "<"}, + {"TimestampScalarCompareTimestampColumn", "LessEqual", "<="}, + {"TimestampScalarCompareTimestampColumn", "Greater", ">"}, + {"TimestampScalarCompareTimestampColumn", "GreaterEqual", ">="}, + + + // Compare timestamp to integer seconds or double seconds with fractional nanoseonds. + {"TimestampColumnCompareColumn", "Equal", "long", "=="}, + {"TimestampColumnCompareColumn", "Equal", "double", "=="}, + {"TimestampColumnCompareColumn", "NotEqual", "long", "!="}, + {"TimestampColumnCompareColumn", "NotEqual", "double", "!="}, + {"TimestampColumnCompareColumn", "Less", "long", "<"}, + {"TimestampColumnCompareColumn", "Less", "double", "<"}, + {"TimestampColumnCompareColumn", "LessEqual", "long", "<="}, + {"TimestampColumnCompareColumn", "LessEqual", "double", "<="}, + {"TimestampColumnCompareColumn", "Greater", "long", ">"}, + {"TimestampColumnCompareColumn", "Greater", "double", ">"}, + {"TimestampColumnCompareColumn", "GreaterEqual", "long", ">="}, + {"TimestampColumnCompareColumn", "GreaterEqual", "double", ">="}, + + {"ColumnCompareTimestampColumn", "Equal", "long", "=="}, + {"ColumnCompareTimestampColumn", "Equal", "double", "=="}, + {"ColumnCompareTimestampColumn", "NotEqual", "long", "!="}, + {"ColumnCompareTimestampColumn", "NotEqual", "double", "!="}, + {"ColumnCompareTimestampColumn", "Less", "long", "<"}, + {"ColumnCompareTimestampColumn", "Less", "double", "<"}, + {"ColumnCompareTimestampColumn", "LessEqual", "long", "<="}, + {"ColumnCompareTimestampColumn", "LessEqual", "double", "<="}, + {"ColumnCompareTimestampColumn", "Greater", "long", ">"}, + {"ColumnCompareTimestampColumn", "Greater", "double", ">"}, + {"ColumnCompareTimestampColumn", "GreaterEqual", "long", ">="}, + {"ColumnCompareTimestampColumn", "GreaterEqual", "double", ">="}, + + {"TimestampColumnCompareScalar", "Equal", "long", "=="}, + {"TimestampColumnCompareScalar", "Equal", "double", "=="}, + {"TimestampColumnCompareScalar", "NotEqual", "long", "!="}, + {"TimestampColumnCompareScalar", "NotEqual", "double", "!="}, + {"TimestampColumnCompareScalar", "Less", "long", "<"}, + {"TimestampColumnCompareScalar", "Less", "double", "<"}, + {"TimestampColumnCompareScalar", "LessEqual", "long", "<="}, + {"TimestampColumnCompareScalar", "LessEqual", "double", "<="}, + {"TimestampColumnCompareScalar", "Greater", "long", ">"}, + {"TimestampColumnCompareScalar", "Greater", "double", ">"}, + {"TimestampColumnCompareScalar", "GreaterEqual", "long", ">="}, + {"TimestampColumnCompareScalar", "GreaterEqual", "double", ">="}, + + {"ScalarCompareTimestampColumn", "Equal", "long", "=="}, + {"ScalarCompareTimestampColumn", "Equal", "double", "=="}, + {"ScalarCompareTimestampColumn", "NotEqual", "long", "!="}, + {"ScalarCompareTimestampColumn", "NotEqual", "double", "!="}, + {"ScalarCompareTimestampColumn", "Less", "long", "<"}, + {"ScalarCompareTimestampColumn", "Less", "double", "<"}, + {"ScalarCompareTimestampColumn", "LessEqual", "long", "<="}, + {"ScalarCompareTimestampColumn", "LessEqual", "double", "<="}, + {"ScalarCompareTimestampColumn", "Greater", "long", ">"}, + {"ScalarCompareTimestampColumn", "Greater", "double", ">"}, + {"ScalarCompareTimestampColumn", "GreaterEqual", "long", ">="}, + {"ScalarCompareTimestampColumn", "GreaterEqual", "double", ">="}, + + {"TimestampScalarCompareColumn", "Equal", "long", "=="}, + {"TimestampScalarCompareColumn", "Equal", "double", "=="}, + {"TimestampScalarCompareColumn", "NotEqual", "long", "!="}, + {"TimestampScalarCompareColumn", "NotEqual", "double", "!="}, + {"TimestampScalarCompareColumn", "Less", "long", "<"}, + {"TimestampScalarCompareColumn", "Less", "double", "<"}, + {"TimestampScalarCompareColumn", "LessEqual", "long", "<="}, + {"TimestampScalarCompareColumn", "LessEqual", "double", "<="}, + {"TimestampScalarCompareColumn", "Greater", "long", ">"}, + {"TimestampScalarCompareColumn", "Greater", "double", ">"}, + {"TimestampScalarCompareColumn", "GreaterEqual", "long", ">="}, + {"TimestampScalarCompareColumn", "GreaterEqual", "double", ">="}, + + {"ColumnCompareTimestampScalar", "Equal", "long", "=="}, + {"ColumnCompareTimestampScalar", "Equal", "double", "=="}, + {"ColumnCompareTimestampScalar", "NotEqual", "long", "!="}, + {"ColumnCompareTimestampScalar", "NotEqual", "double", "!="}, + {"ColumnCompareTimestampScalar", "Less", "long", "<"}, + {"ColumnCompareTimestampScalar", "Less", "double", "<"}, + {"ColumnCompareTimestampScalar", "LessEqual", "long", "<="}, + {"ColumnCompareTimestampScalar", "LessEqual", "double", "<="}, + {"ColumnCompareTimestampScalar", "Greater", "long", ">"}, + {"ColumnCompareTimestampScalar", "Greater", "double", ">"}, + {"ColumnCompareTimestampScalar", "GreaterEqual", "long", ">="}, + {"ColumnCompareTimestampScalar", "GreaterEqual", "double", ">="}, + + // Filter long/double. {"FilterColumnCompareScalar", "Equal", "long", "double", "=="}, {"FilterColumnCompareScalar", "Equal", "double", "double", "=="}, {"FilterColumnCompareScalar", "NotEqual", "long", "double", "!="}, @@ -343,46 +473,108 @@ {"FilterScalarCompareColumn", "GreaterEqual", "long", "long", ">="}, {"FilterScalarCompareColumn", "GreaterEqual", "double", "long", ">="}, - {"FilterTimestampColumnCompareTimestampScalar", "Equal"}, - {"FilterTimestampColumnCompareTimestampScalar", "NotEqual"}, - {"FilterTimestampColumnCompareTimestampScalar", "Less"}, - {"FilterTimestampColumnCompareTimestampScalar", "LessEqual"}, - {"FilterTimestampColumnCompareTimestampScalar", "Greater"}, - {"FilterTimestampColumnCompareTimestampScalar", "GreaterEqual"}, - - {"FilterTimestampColumnCompareScalar", "Equal", "long"}, - {"FilterTimestampColumnCompareScalar", "Equal", "double"}, - {"FilterTimestampColumnCompareScalar", "NotEqual", "long"}, - {"FilterTimestampColumnCompareScalar", "NotEqual", "double"}, - {"FilterTimestampColumnCompareScalar", "Less", "long"}, - {"FilterTimestampColumnCompareScalar", "Less", "double"}, - {"FilterTimestampColumnCompareScalar", "LessEqual", "long"}, - {"FilterTimestampColumnCompareScalar", "LessEqual", "double"}, - {"FilterTimestampColumnCompareScalar", "Greater", "long"}, - {"FilterTimestampColumnCompareScalar", "Greater", "double"}, - {"FilterTimestampColumnCompareScalar", "GreaterEqual", "long"}, - {"FilterTimestampColumnCompareScalar", "GreaterEqual", "double"}, - - {"FilterTimestampScalarCompareTimestampColumn", "Equal"}, - {"FilterTimestampScalarCompareTimestampColumn", "NotEqual"}, - {"FilterTimestampScalarCompareTimestampColumn", "Less"}, - {"FilterTimestampScalarCompareTimestampColumn", "LessEqual"}, - {"FilterTimestampScalarCompareTimestampColumn", "Greater"}, - {"FilterTimestampScalarCompareTimestampColumn", "GreaterEqual"}, - - {"FilterScalarCompareTimestampColumn", "Equal", "long"}, - {"FilterScalarCompareTimestampColumn", "Equal", "double"}, - {"FilterScalarCompareTimestampColumn", "NotEqual", "long"}, - {"FilterScalarCompareTimestampColumn", "NotEqual", "double"}, - {"FilterScalarCompareTimestampColumn", "Less", "long"}, - {"FilterScalarCompareTimestampColumn", "Less", "double"}, - {"FilterScalarCompareTimestampColumn", "LessEqual", "long"}, - {"FilterScalarCompareTimestampColumn", "LessEqual", "double"}, - {"FilterScalarCompareTimestampColumn", "Greater", "long"}, - {"FilterScalarCompareTimestampColumn", "Greater", "double"}, - {"FilterScalarCompareTimestampColumn", "GreaterEqual", "long"}, - {"FilterScalarCompareTimestampColumn", "GreaterEqual", "double"}, - + // Filter timestamp against timestamp, long (seconds), double (seconds with fractional + // nanoseconds). + {"FilterTimestampColumnCompareTimestampColumn", "Equal", "=="}, + {"FilterTimestampColumnCompareTimestampColumn", "NotEqual", "!="}, + {"FilterTimestampColumnCompareTimestampColumn", "Less", "<"}, + {"FilterTimestampColumnCompareTimestampColumn", "LessEqual", "<="}, + {"FilterTimestampColumnCompareTimestampColumn", "Greater", ">"}, + {"FilterTimestampColumnCompareTimestampColumn", "GreaterEqual", ">="}, + + {"FilterTimestampColumnCompareColumn", "Equal", "long", "=="}, + {"FilterTimestampColumnCompareColumn", "Equal", "double", "=="}, + {"FilterTimestampColumnCompareColumn", "NotEqual", "long", "!="}, + {"FilterTimestampColumnCompareColumn", "NotEqual", "double", "!="}, + {"FilterTimestampColumnCompareColumn", "Less", "long", "<"}, + {"FilterTimestampColumnCompareColumn", "Less", "double", "<"}, + {"FilterTimestampColumnCompareColumn", "LessEqual", "long", "<="}, + {"FilterTimestampColumnCompareColumn", "LessEqual", "double", "<="}, + {"FilterTimestampColumnCompareColumn", "Greater", "long", ">"}, + {"FilterTimestampColumnCompareColumn", "Greater", "double", ">"}, + {"FilterTimestampColumnCompareColumn", "GreaterEqual", "long", ">="}, + {"FilterTimestampColumnCompareColumn", "GreaterEqual", "double", ">="}, + + {"FilterColumnCompareTimestampColumn", "Equal", "long", "=="}, + {"FilterColumnCompareTimestampColumn", "Equal", "double", "=="}, + {"FilterColumnCompareTimestampColumn", "NotEqual", "long", "!="}, + {"FilterColumnCompareTimestampColumn", "NotEqual", "double", "!="}, + {"FilterColumnCompareTimestampColumn", "Less", "long", "<"}, + {"FilterColumnCompareTimestampColumn", "Less", "double", "<"}, + {"FilterColumnCompareTimestampColumn", "LessEqual", "long", "<="}, + {"FilterColumnCompareTimestampColumn", "LessEqual", "double", "<="}, + {"FilterColumnCompareTimestampColumn", "Greater", "long", ">"}, + {"FilterColumnCompareTimestampColumn", "Greater", "double", ">"}, + {"FilterColumnCompareTimestampColumn", "GreaterEqual", "long", ">="}, + {"FilterColumnCompareTimestampColumn", "GreaterEqual", "double", ">="}, + + {"FilterTimestampColumnCompareTimestampScalar", "Equal", "=="}, + {"FilterTimestampColumnCompareTimestampScalar", "NotEqual", "!="}, + {"FilterTimestampColumnCompareTimestampScalar", "Less", "<"}, + {"FilterTimestampColumnCompareTimestampScalar", "LessEqual", "<="}, + {"FilterTimestampColumnCompareTimestampScalar", "Greater", ">"}, + {"FilterTimestampColumnCompareTimestampScalar", "GreaterEqual", ">="}, + + {"FilterTimestampColumnCompareScalar", "Equal", "long", "=="}, + {"FilterTimestampColumnCompareScalar", "Equal", "double", "=="}, + {"FilterTimestampColumnCompareScalar", "NotEqual", "long", "!="}, + {"FilterTimestampColumnCompareScalar", "NotEqual", "double", "!="}, + {"FilterTimestampColumnCompareScalar", "Less", "long", "<"}, + {"FilterTimestampColumnCompareScalar", "Less", "double", "<"}, + {"FilterTimestampColumnCompareScalar", "LessEqual", "long", "<="}, + {"FilterTimestampColumnCompareScalar", "LessEqual", "double", "<="}, + {"FilterTimestampColumnCompareScalar", "Greater", "long", ">"}, + {"FilterTimestampColumnCompareScalar", "Greater", "double", ">"}, + {"FilterTimestampColumnCompareScalar", "GreaterEqual", "long", ">="}, + {"FilterTimestampColumnCompareScalar", "GreaterEqual", "double", ">="}, + + {"FilterColumnCompareTimestampScalar", "Equal", "long", "=="}, + {"FilterColumnCompareTimestampScalar", "Equal", "double", "=="}, + {"FilterColumnCompareTimestampScalar", "NotEqual", "long", "!="}, + {"FilterColumnCompareTimestampScalar", "NotEqual", "double", "!="}, + {"FilterColumnCompareTimestampScalar", "Less", "long", "<"}, + {"FilterColumnCompareTimestampScalar", "Less", "double", "<"}, + {"FilterColumnCompareTimestampScalar", "LessEqual", "long", "<="}, + {"FilterColumnCompareTimestampScalar", "LessEqual", "double", "<="}, + {"FilterColumnCompareTimestampScalar", "Greater", "long", ">"}, + {"FilterColumnCompareTimestampScalar", "Greater", "double", ">"}, + {"FilterColumnCompareTimestampScalar", "GreaterEqual", "long", ">="}, + {"FilterColumnCompareTimestampScalar", "GreaterEqual", "double", ">="}, + + {"FilterTimestampScalarCompareTimestampColumn", "Equal", "=="}, + {"FilterTimestampScalarCompareTimestampColumn", "NotEqual", "!="}, + {"FilterTimestampScalarCompareTimestampColumn", "Less", "<"}, + {"FilterTimestampScalarCompareTimestampColumn", "LessEqual", "<="}, + {"FilterTimestampScalarCompareTimestampColumn", "Greater", ">"}, + {"FilterTimestampScalarCompareTimestampColumn", "GreaterEqual", ">="}, + + {"FilterTimestampScalarCompareColumn", "Equal", "long", "=="}, + {"FilterTimestampScalarCompareColumn", "Equal", "double", "=="}, + {"FilterTimestampScalarCompareColumn", "NotEqual", "long", "!="}, + {"FilterTimestampScalarCompareColumn", "NotEqual", "double", "!="}, + {"FilterTimestampScalarCompareColumn", "Less", "long", "<"}, + {"FilterTimestampScalarCompareColumn", "Less", "double", "<"}, + {"FilterTimestampScalarCompareColumn", "LessEqual", "long", "<="}, + {"FilterTimestampScalarCompareColumn", "LessEqual", "double", "<="}, + {"FilterTimestampScalarCompareColumn", "Greater", "long", ">"}, + {"FilterTimestampScalarCompareColumn", "Greater", "double", ">"}, + {"FilterTimestampScalarCompareColumn", "GreaterEqual", "long", ">="}, + {"FilterTimestampScalarCompareColumn", "GreaterEqual", "double", ">="}, + + {"FilterScalarCompareTimestampColumn", "Equal", "long", "=="}, + {"FilterScalarCompareTimestampColumn", "Equal", "double", "=="}, + {"FilterScalarCompareTimestampColumn", "NotEqual", "long", "!="}, + {"FilterScalarCompareTimestampColumn", "NotEqual", "double", "!="}, + {"FilterScalarCompareTimestampColumn", "Less", "long", "<"}, + {"FilterScalarCompareTimestampColumn", "Less", "double", "<"}, + {"FilterScalarCompareTimestampColumn", "LessEqual", "long", "<="}, + {"FilterScalarCompareTimestampColumn", "LessEqual", "double", "<="}, + {"FilterScalarCompareTimestampColumn", "Greater", "long", ">"}, + {"FilterScalarCompareTimestampColumn", "Greater", "double", ">"}, + {"FilterScalarCompareTimestampColumn", "GreaterEqual", "long", ">="}, + {"FilterScalarCompareTimestampColumn", "GreaterEqual", "double", ">="}, + + // String group comparison. {"FilterStringGroupColumnCompareStringGroupScalarBase", "Equal", "=="}, {"FilterStringGroupColumnCompareStringGroupScalarBase", "NotEqual", "!="}, {"FilterStringGroupColumnCompareStringGroupScalarBase", "Less", "<"}, @@ -476,26 +668,28 @@ {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "Greater", ">"}, {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "GreaterEqual", ">="}, - {"FilterDecimalColumnCompareScalar", "Equal", "=="}, - {"FilterDecimalColumnCompareScalar", "NotEqual", "!="}, - {"FilterDecimalColumnCompareScalar", "Less", "<"}, - {"FilterDecimalColumnCompareScalar", "LessEqual", "<="}, - {"FilterDecimalColumnCompareScalar", "Greater", ">"}, - {"FilterDecimalColumnCompareScalar", "GreaterEqual", ">="}, - - {"FilterDecimalScalarCompareColumn", "Equal", "=="}, - {"FilterDecimalScalarCompareColumn", "NotEqual", "!="}, - {"FilterDecimalScalarCompareColumn", "Less", "<"}, - {"FilterDecimalScalarCompareColumn", "LessEqual", "<="}, - {"FilterDecimalScalarCompareColumn", "Greater", ">"}, - {"FilterDecimalScalarCompareColumn", "GreaterEqual", ">="}, - - {"FilterDecimalColumnCompareColumn", "Equal", "=="}, - {"FilterDecimalColumnCompareColumn", "NotEqual", "!="}, - {"FilterDecimalColumnCompareColumn", "Less", "<"}, - {"FilterDecimalColumnCompareColumn", "LessEqual", "<="}, - {"FilterDecimalColumnCompareColumn", "Greater", ">"}, - {"FilterDecimalColumnCompareColumn", "GreaterEqual", ">="}, + + {"FilterDecimalColumnCompareDecimalScalar", "Equal", "=="}, + {"FilterDecimalColumnCompareDecimalScalar", "NotEqual", "!="}, + {"FilterDecimalColumnCompareDecimalScalar", "Less", "<"}, + {"FilterDecimalColumnCompareDecimalScalar", "LessEqual", "<="}, + {"FilterDecimalColumnCompareDecimalScalar", "Greater", ">"}, + {"FilterDecimalColumnCompareDecimalScalar", "GreaterEqual", ">="}, + + {"FilterDecimalScalarCompareDecimalColumn", "Equal", "=="}, + {"FilterDecimalScalarCompareDecimalColumn", "NotEqual", "!="}, + {"FilterDecimalScalarCompareDecimalColumn", "Less", "<"}, + {"FilterDecimalScalarCompareDecimalColumn", "LessEqual", "<="}, + {"FilterDecimalScalarCompareDecimalColumn", "Greater", ">"}, + {"FilterDecimalScalarCompareDecimalColumn", "GreaterEqual", ">="}, + + {"FilterDecimalColumnCompareDecimalColumn", "Equal", "=="}, + {"FilterDecimalColumnCompareDecimalColumn", "NotEqual", "!="}, + {"FilterDecimalColumnCompareDecimalColumn", "Less", "<"}, + {"FilterDecimalColumnCompareDecimalColumn", "LessEqual", "<="}, + {"FilterDecimalColumnCompareDecimalColumn", "Greater", ">"}, + {"FilterDecimalColumnCompareDecimalColumn", "GreaterEqual", ">="}, + {"StringGroupScalarCompareStringGroupColumnBase", "Equal", "=="}, {"StringGroupScalarCompareStringGroupColumnBase", "NotEqual", "!="}, @@ -573,6 +767,9 @@ {"FilterDecimalColumnBetween", ""}, {"FilterDecimalColumnBetween", "!"}, + {"FilterTimestampColumnBetween", ""}, + {"FilterTimestampColumnBetween", "!"}, + {"ColumnCompareColumn", "Equal", "long", "double", "=="}, {"ColumnCompareColumn", "Equal", "double", "double", "=="}, {"ColumnCompareColumn", "NotEqual", "long", "double", "!="}, @@ -741,24 +938,12 @@ // Casts {"ColumnUnaryFunc", "Cast", "long", "double", "", "", "(long)", "", ""}, {"ColumnUnaryFunc", "Cast", "double", "long", "", "", "(double)", "", ""}, - {"ColumnUnaryFunc", "CastTimestampToLongVia", "long", "long", "MathExpr.fromTimestamp", "", - "", "", "timestamp"}, - {"ColumnUnaryFunc", "CastTimestampToDoubleVia", "double", "long", - "MathExpr.fromTimestampToDouble", "", "", "", "timestamp"}, {"ColumnUnaryFunc", "CastDoubleToBooleanVia", "long", "double", "MathExpr.toBool", "", "", "", ""}, {"ColumnUnaryFunc", "CastLongToBooleanVia", "long", "long", "MathExpr.toBool", "", "", "", ""}, {"ColumnUnaryFunc", "CastDateToBooleanVia", "long", "long", "MathExpr.toBool", "", "", "", "date"}, - {"ColumnUnaryFunc", "CastTimestampToBooleanVia", "long", "long", "MathExpr.toBool", "", - "", "", "timestamp"}, - {"ColumnUnaryFunc", "CastLongToTimestampVia", "long", "long", "MathExpr.longToTimestamp", "", - "", "", ""}, - {"ColumnUnaryFunc", "CastMillisecondsLongToTimestampVia", "long", "long", "MathExpr.millisecondsLongToTimestamp", "", - "", "", ""}, - {"ColumnUnaryFunc", "CastDoubleToTimestampVia", "long", "double", - "MathExpr.doubleToTimestamp", "", "", "", ""}, // Boolean to long is done with an IdentityExpression // Boolean to double is done with standard Long to Double cast @@ -803,6 +988,11 @@ {"VectorUDAFMinMaxString", "VectorUDAFMaxString", ">", "max", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: string)"}, + {"VectorUDAFMinMaxTimestamp", "VectorUDAFMaxTimestamp", "<", "max", + "_FUNC_(expr) - Returns the maximum value of expr (vectorized, type: timestamp)"}, + {"VectorUDAFMinMaxTimestamp", "VectorUDAFMinTimestamp", ">", "min", + "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: timestamp)"}, + //template, , {"VectorUDAFSum", "VectorUDAFSumLong", "long"}, {"VectorUDAFSum", "VectorUDAFSumDouble", "double"}, @@ -947,26 +1137,52 @@ private void generate() throws Exception { generateColumnCompareScalar(tdesc); } else if (tdesc[0].equals("ScalarCompareColumn")) { generateScalarCompareColumn(tdesc); - } else if (tdesc[0].equals("TimestampScalarCompareTimestampColumn")) { - generateTimestampScalarCompareTimestampColumn(tdesc); - } else if (tdesc[0].equals("ScalarCompareTimestampColumn")) { - generateScalarCompareTimestampColumn(tdesc); + + } else if (tdesc[0].equals("TimestampColumnCompareTimestampColumn")) { + generateTimestampColumnCompareTimestampColumn(tdesc); } else if (tdesc[0].equals("TimestampColumnCompareTimestampScalar")) { - generateTimestampColumnCompareTimestampScalar(tdesc); + generateTimestampColumnCompareTimestampScalar(tdesc); + } else if (tdesc[0].equals("TimestampScalarCompareTimestampColumn")) { + generateTimestampScalarCompareTimestampColumn(tdesc); + + } else if (tdesc[0].equals("TimestampColumnCompareColumn")) { + generateTimestampColumnCompareColumn(tdesc); } else if (tdesc[0].equals("TimestampColumnCompareScalar")) { - generateTimestampColumnCompareScalar(tdesc); + generateTimestampColumnCompareScalar(tdesc); + } else if (tdesc[0].equals("TimestampScalarCompareColumn")) { + generateTimestampScalarCompareColumn(tdesc); + + } else if (tdesc[0].equals("ColumnCompareTimestampColumn")) { + generateColumnCompareTimestampColumn(tdesc); + } else if (tdesc[0].equals("ColumnCompareTimestampScalar")) { + generateColumnCompareTimestampScalar(tdesc); + } else if (tdesc[0].equals("ScalarCompareTimestampColumn")) { + generateScalarCompareTimestampColumn(tdesc); + } else if (tdesc[0].equals("FilterColumnCompareScalar")) { generateFilterColumnCompareScalar(tdesc); } else if (tdesc[0].equals("FilterScalarCompareColumn")) { generateFilterScalarCompareColumn(tdesc); + + } else if (tdesc[0].equals("FilterTimestampColumnCompareTimestampColumn")) { + generateFilterTimestampColumnCompareTimestampColumn(tdesc); + } else if (tdesc[0].equals("FilterTimestampColumnCompareColumn")) { + generateFilterTimestampColumnCompareColumn(tdesc); + } else if (tdesc[0].equals("FilterColumnCompareTimestampColumn")) { + generateFilterColumnCompareTimestampColumn(tdesc); } else if (tdesc[0].equals("FilterTimestampColumnCompareTimestampScalar")) { - generateFilterTimestampColumnCompareTimestampScalar(tdesc); + generateFilterTimestampColumnCompareTimestampScalar(tdesc); } else if (tdesc[0].equals("FilterTimestampColumnCompareScalar")) { - generateFilterTimestampColumnCompareScalar(tdesc); + generateFilterTimestampColumnCompareScalar(tdesc); + } else if (tdesc[0].equals("FilterColumnCompareTimestampScalar")) { + generateFilterColumnCompareTimestampScalar(tdesc); } else if (tdesc[0].equals("FilterTimestampScalarCompareTimestampColumn")) { - generateFilterTimestampScalarCompareTimestampColumn(tdesc); + generateFilterTimestampScalarCompareTimestampColumn(tdesc); + } else if (tdesc[0].equals("FilterTimestampScalarCompareColumn")) { + generateFilterTimestampScalarCompareColumn(tdesc); } else if (tdesc[0].equals("FilterScalarCompareTimestampColumn")) { - generateFilterScalarCompareTimestampColumn(tdesc); + generateFilterScalarCompareTimestampColumn(tdesc); + } else if (tdesc[0].equals("FilterColumnBetween")) { generateFilterColumnBetween(tdesc); } else if (tdesc[0].equals("ScalarArithmeticColumn") || tdesc[0].equals("ScalarDivideColumn")) { @@ -988,7 +1204,9 @@ private void generate() throws Exception { } else if (tdesc[0].equals("VectorUDAFMinMaxString")) { generateVectorUDAFMinMaxString(tdesc); } else if (tdesc[0].equals("VectorUDAFMinMaxDecimal")) { - generateVectorUDAFMinMaxDecimal(tdesc); + generateVectorUDAFMinMaxObject(tdesc); + } else if (tdesc[0].equals("VectorUDAFMinMaxTimestamp")) { + generateVectorUDAFMinMaxObject(tdesc); } else if (tdesc[0].equals("VectorUDAFSum")) { generateVectorUDAFSum(tdesc); } else if (tdesc[0].equals("VectorUDAFAvg")) { @@ -1009,7 +1227,9 @@ private void generate() throws Exception { generateFilterTruncStringColumnBetween(tdesc); } else if (tdesc[0].equals("FilterDecimalColumnBetween")) { generateFilterDecimalColumnBetween(tdesc); - } else if (tdesc[0].equals("StringGroupColumnCompareStringGroupScalarBase")) { + } else if (tdesc[0].equals("FilterTimestampColumnBetween")) { + generateFilterTimestampColumnBetween(tdesc); + } else if (tdesc[0].equals("StringGroupColumnCompareStringGroupScalarBase")) { generateStringGroupColumnCompareStringGroupScalarBase(tdesc); } else if (tdesc[0].equals("StringGroupColumnCompareStringScalar")) { generateStringGroupColumnCompareStringScalar(tdesc); @@ -1037,12 +1257,12 @@ private void generate() throws Exception { generateIfExprScalarColumn(tdesc); } else if (tdesc[0].equals("IfExprScalarScalar")) { generateIfExprScalarScalar(tdesc); - } else if (tdesc[0].equals("FilterDecimalColumnCompareScalar")) { - generateFilterDecimalColumnCompareScalar(tdesc); - } else if (tdesc[0].equals("FilterDecimalScalarCompareColumn")) { - generateFilterDecimalScalarCompareColumn(tdesc); - } else if (tdesc[0].equals("FilterDecimalColumnCompareColumn")) { - generateFilterDecimalColumnCompareColumn(tdesc); + } else if (tdesc[0].equals("FilterDecimalColumnCompareDecimalScalar")) { + generateFilterDecimalColumnCompareDecimalScalar(tdesc); + } else if (tdesc[0].equals("FilterDecimalScalarCompareDecimalColumn")) { + generateFilterDecimalScalarCompareDecimalColumn(tdesc); + } else if (tdesc[0].equals("FilterDecimalColumnCompareDecimalColumn")) { + generateFilterDecimalColumnCompareDecimalColumn(tdesc); } else if (tdesc[0].equals("FilterDTIScalarCompareColumn")) { generateFilterDTIScalarCompareColumn(tdesc); } else if (tdesc[0].equals("FilterDTIColumnCompareScalar")) { @@ -1057,24 +1277,66 @@ private void generate() throws Exception { generateScalarArithmeticColumn(tdesc); } else if (tdesc[0].equals("DTIColumnArithmeticDTIColumnNoConvert")) { generateColumnArithmeticColumn(tdesc); - } else if (tdesc[0].equals("ColumnArithmeticColumnWithConvert")) { - generateColumnArithmeticColumnWithConvert(tdesc); - } else if (tdesc[0].equals("ScalarArithmeticColumnWithConvert")) { - generateScalarArithmeticColumnWithConvert(tdesc); - } else if (tdesc[0].equals("ColumnArithmeticScalarWithConvert")) { - generateColumnArithmeticScalarWithConvert(tdesc); - } else if (tdesc[0].equals("DateTimeColumnArithmeticIntervalColumnWithConvert")) { - generateDateTimeColumnArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("DateTimeScalarArithmeticIntervalColumnWithConvert")) { - generateDateTimeScalarArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("DateTimeColumnArithmeticIntervalScalarWithConvert")) { - generateDateTimeColumnArithmeticIntervalScalarWithConvert(tdesc); - } else if (tdesc[0].equals("IntervalColumnArithmeticDateTimeColumnWithConvert")) { - generateDateTimeColumnArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("IntervalScalarArithmeticDateTimeColumnWithConvert")) { - generateDateTimeScalarArithmeticIntervalColumnWithConvert(tdesc); - } else if (tdesc[0].equals("IntervalColumnArithmeticDateTimeScalarWithConvert")) { - generateDateTimeColumnArithmeticIntervalScalarWithConvert(tdesc); + } else if (tdesc[0].equals("DateColumnArithmeticIntervalYearMonthColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("DateScalarArithmeticIntervalYearMonthColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("DateColumnArithmeticIntervalYearMonthScalar")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("IntervalYearMonthColumnArithmeticDateColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("IntervalYearMonthScalarArithmeticDateColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("IntervalYearMonthColumnArithmeticDateScalar")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticIntervalYearMonthColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("TimestampScalarArithmeticIntervalYearMonthColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticIntervalYearMonthScalar")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("IntervalYearMonthColumnArithmeticTimestampColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("IntervalYearMonthScalarArithmeticTimestampColumn")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("IntervalYearMonthColumnArithmeticTimestampScalar")) { + generateDateTimeArithmeticIntervalYearMonth(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticTimestampColumnBase")) { + generateTimestampArithmeticTimestampBase(tdesc); + } else if (tdesc[0].equals("TimestampScalarArithmeticTimestampColumnBase")) { + generateTimestampArithmeticTimestampBase(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticTimestampScalarBase")) { + generateTimestampArithmeticTimestampBase(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticTimestampColumn")) { + generateTimestampArithmeticTimestamp(tdesc); + } else if (tdesc[0].equals("TimestampScalarArithmeticTimestampColumn")) { + generateTimestampArithmeticTimestamp(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticTimestampScalar")) { + generateTimestampArithmeticTimestamp(tdesc); + } else if (tdesc[0].equals("DateColumnArithmeticTimestampColumnBase")) { + generateDateArithmeticTimestampBase(tdesc); + } else if (tdesc[0].equals("DateScalarArithmeticTimestampColumnBase")) { + generateDateArithmeticTimestampBase(tdesc); + } else if (tdesc[0].equals("DateColumnArithmeticTimestampScalarBase")) { + generateDateArithmeticTimestampBase(tdesc); + } else if (tdesc[0].equals("DateColumnArithmeticTimestampColumn")) { + generateDateArithmeticTimestamp(tdesc); + } else if (tdesc[0].equals("DateScalarArithmeticTimestampColumn")) { + generateDateArithmeticTimestamp(tdesc); + } else if (tdesc[0].equals("DateColumnArithmeticTimestampScalar")) { + generateDateArithmeticTimestamp(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticDateColumnBase")) { + generateTimestampArithmeticDateBase(tdesc); + } else if (tdesc[0].equals("TimestampScalarArithmeticDateColumnBase")) { + generateTimestampArithmeticDateBase(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticDateScalarBase")) { + generateTimestampArithmeticDateBase(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticDateColumn")) { + generateTimestampArithmeticDate(tdesc); + } else if (tdesc[0].equals("TimestampScalarArithmeticDateColumn")) { + generateTimestampArithmeticDate(tdesc); + } else if (tdesc[0].equals("TimestampColumnArithmeticDateScalar")) { + generateTimestampArithmeticDate(tdesc); } else { continue; } @@ -1140,6 +1402,20 @@ private void generateFilterDecimalColumnBetween(String[] tdesc) throws IOExcepti className, templateString); } + private void generateFilterTimestampColumnBetween(String[] tdesc) throws IOException { + String optionalNot = tdesc[1]; + String className = "FilterTimestampColumn" + (optionalNot.equals("!") ? "Not" : "") + + "Between"; + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", optionalNot); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + private void generateFilterColumnBetween(String[] tdesc) throws Exception { String operandType = tdesc[1]; String optionalNot = tdesc[2]; @@ -1211,7 +1487,7 @@ private void generateVectorUDAFMinMaxString(String[] tdesc) throws Exception { className, templateString); } - private void generateVectorUDAFMinMaxDecimal(String[] tdesc) throws Exception { + private void generateVectorUDAFMinMaxObject(String[] tdesc) throws Exception { String className = tdesc[1]; String operatorSymbol = tdesc[2]; String descName = tdesc[3]; @@ -1843,12 +2119,10 @@ private void generateColumnCompareOperatorColumn(String[] tdesc, boolean filter, String vectorExprArgType1 = operandType1; String vectorExprArgType2 = operandType2; - // For column to column only, we toss in timestamp and date. - // But {timestamp|date} and scalar must be handled separately. + // For column to column only, we toss in date and interval_year_month. if (operandType1.equals("long") && operandType2.equals("long")) { - // Let comparisons occur for DATE and TIMESTAMP, too. - vectorExprArgType1 = "int_datetime_interval_family"; - vectorExprArgType2 = "int_datetime_interval_family"; + vectorExprArgType1 = "int_date_interval_year_month"; + vectorExprArgType2 = "int_date_interval_year_month"; } templateString = templateString.replaceAll("", vectorExprArgType1); templateString = templateString.replaceAll("", vectorExprArgType2); @@ -1870,80 +2144,273 @@ private void generateColumnCompareOperatorColumn(String[] tdesc, boolean filter, } } - private void generateTimestampScalarCompareTimestampColumn(String[] tdesc) throws Exception { + // ----------------------------------------------------------------------------------------------- + // + // Filter timestamp against timestamp, long (seconds), and double (seconds with fractional + // nanoseconds). + // + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* Filter {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // Filter TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Scalar + //* Filter {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // + // Filter TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // Filter TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* Filter {Long|Double}Scalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // ----------------------------------------------------------------------------------------------- + + private void generateFilterTimestampColumnCompareTimestampColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String className = "TimestampScalar" + operatorName + "TimestampColumn"; - String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalar" + operatorName + "LongColumn"; + String operatorSymbol = tdesc[2]; + String className = "FilterTimestampCol" + operatorName + "TimestampColumn"; + //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorSymbol); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateTimestampColumnCompareTimestampScalar(String[] tdesc) throws Exception { + private void generateFilterTimestampColumnCompareColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String className = "TimestampCol" + operatorName + "TimestampScalar"; - String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongCol" + operatorName + "LongScalar"; + String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; + String inputColumnVectorType2 = this.getColumnVectorType(operandType); + + String className = "FilterTimestampCol" + operatorName + getCamelCaseType(operandType) + "Column"; + //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType2); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateFilterColumnCompareTimestampColumn(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; + String inputColumnVectorType1 = this.getColumnVectorType(operandType); + + String className = "Filter" + getCamelCaseType(operandType) + "Col" + operatorName + "TimestampColumn"; + + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType1); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } private void generateFilterTimestampColumnCompareTimestampScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; + String operatorSymbol = tdesc[2]; String className = "FilterTimestampCol" + operatorName + "TimestampScalar"; - String baseClassName = "FilterLongCol" + operatorName + "LongScalar"; + + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operatorSymbol); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateFilterTimestampColumnCompareScalar(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; + String className = "FilterTimestampCol" + operatorName + getCamelCaseType(operandType) + "Scalar"; + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateFilterColumnCompareTimestampScalar(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; + String camelCaseOperandType = getCamelCaseType(operandType); + String className = "Filter" + camelCaseOperandType + "Col" + operatorName + "TimestampScalar"; + String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.gen." + + "Filter" + camelCaseOperandType + "Col" + operatorName + camelCaseOperandType + "Scalar"; + //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } private void generateFilterTimestampScalarCompareTimestampColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; + String operatorSymbol = tdesc[2]; String className = "FilterTimestampScalar" + operatorName + "TimestampColumn"; - String baseClassName = "FilterLongScalar" + operatorName + "LongColumn"; + + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateFilterTimestampScalarCompareColumn(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; + String camelCaseOperandType = getCamelCaseType(operandType); + String className = "FilterTimestampScalar" + operatorName + getCamelCaseType(operandType) + "Column"; + String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.gen." + + "Filter" + camelCaseOperandType + "Scalar" + operatorName + camelCaseOperandType + "Column"; + //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private String timestampScalarConversion(String operandType) { + private void generateFilterScalarCompareTimestampColumn(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; + String className = "Filter" + getCamelCaseType(operandType) + "Scalar" + operatorName + "TimestampColumn"; + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private String timestampLongDoubleMethod(String operandType) { if (operandType.equals("long")) { - return "secondsToNanoseconds"; + return "getEpochSeconds"; } else if (operandType.equals("double")) { - return "doubleToNanoseconds"; + return "getEpochSecondsWithFractionalNanos"; } else { return "unknown"; } } - private void generateScalarCompareTimestampColumn(String[] tdesc) throws Exception { + // ----------------------------------------------------------------------------------------------- + // + // Compare timestamp against timestamp, long (seconds), and double (seconds with fractional + // nanoseconds). + // + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // TimestampCol {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Scalar + //* {Long|Double}Col {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampScalar + // + // TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // TimestampScalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} {Long|Double}Column + //* {Long|Double}Scalar {Equal|Greater|GreaterEqual|Less|LessEqual|NotEqual} TimestampColumn + // + // ----------------------------------------------------------------------------------------------- + + private void generateTimestampColumnCompareTimestampColumn(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operatorSymbol = tdesc[2]; + String className = "TimestampCol" + operatorName + "TimestampColumn"; + + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateTimestampColumnCompareColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; - String className = getCamelCaseType(operandType) + "Scalar" + operatorName + "TimestampColumn"; - String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalar" + operatorName + "LongColumn"; + String operatorSymbol = tdesc[3]; + String inputColumnVectorType2 = this.getColumnVectorType(operandType); + + String className = "TimestampCol" + operatorName + getCamelCaseType(operandType) + "Column"; + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType2); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateColumnCompareTimestampColumn(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; + String inputColumnVectorType1 = this.getColumnVectorType(operandType); + + String className = getCamelCaseType(operandType) + "Col" + operatorName + "TimestampColumn"; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType1); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateTimestampColumnCompareTimestampScalar(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operatorSymbol = tdesc[2]; + String className = "TimestampCol" + operatorName + "TimestampScalar"; + + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operatorSymbol); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } @@ -1951,6 +2418,7 @@ private void generateScalarCompareTimestampColumn(String[] tdesc) throws Excepti private void generateTimestampColumnCompareScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; + String operatorSymbol = tdesc[3]; String className = "TimestampCol" + operatorName + getCamelCaseType(operandType) + "Scalar"; String baseClassName = "org.apache.hadoop.hive.ql.exec.vector.expressions.LongCol" + operatorName + "LongScalar"; //Read the template into a string; @@ -1959,43 +2427,87 @@ private void generateTimestampColumnCompareScalar(String[] tdesc) throws Excepti templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateFilterTimestampColumnCompareScalar(String[] tdesc) throws Exception { + private void generateColumnCompareTimestampScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; - String className = "FilterTimestampCol" + operatorName + getCamelCaseType(operandType) + "Scalar"; - String baseClassName = "FilterLongCol" + operatorName + "LongScalar"; + String operatorSymbol = tdesc[3]; + String inputColumnVectorType1 = this.getColumnVectorType(operandType); + + String className = getCamelCaseType(operandType) + "Col" + operatorName + "TimestampScalar"; + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", inputColumnVectorType1); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateTimestampScalarCompareTimestampColumn(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operatorSymbol = tdesc[2]; + String className = "TimestampScalar" + operatorName + "TimestampColumn"; + + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateTimestampScalarCompareColumn(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String operandType = tdesc[2]; + + String camelCaseOperandType = getCamelCaseType(operandType); + String className = "TimestampScalar" + operatorName + getCamelCaseType(operandType) + "Column"; + String baseClassName = camelCaseOperandType + "Scalar" + operatorName + camelCaseOperandType + "Column"; + //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } - private void generateFilterScalarCompareTimestampColumn(String[] tdesc) throws Exception { + private void generateScalarCompareTimestampColumn(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; - String className = "Filter" + getCamelCaseType(operandType) + "Scalar" + operatorName + "TimestampColumn"; - String baseClassName = "FilterLongScalar" + operatorName + "LongColumn"; + String operatorSymbol = tdesc[3]; + String camelCaseOperandType = getCamelCaseType(operandType); + String className = camelCaseOperandType + "Scalar" + operatorName + "TimestampColumn"; + String baseClassName = camelCaseOperandType + "Scalar" + operatorName + camelCaseOperandType + "Column"; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); - templateString = templateString.replaceAll("", timestampScalarConversion(operandType)); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", timestampLongDoubleMethod(operandType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } + // ----------------------------------------------------------------------------------------------- + // + // ----------------------------------------------------------------------------------------------- + private void generateColumnArithmeticOperatorColumn(String[] tdesc, String returnType, String className) throws Exception { @@ -2098,7 +2610,7 @@ private void generateColumnArithmeticOperatorScalar(String[] tdesc, String retur className, templateString); String testScalarType = operandType2; - if (isDateTimeIntervalType(testScalarType)) { + if (isDateIntervalType(testScalarType)) { testScalarType = "long"; } @@ -2180,7 +2692,7 @@ private void generateScalarArithmeticOperatorColumn(String[] tdesc, String retur className, templateString); String testScalarType = operandType1; - if (isDateTimeIntervalType(testScalarType)) { + if (isDateIntervalType(testScalarType)) { testScalarType = "long"; } @@ -2297,19 +2809,19 @@ private void generateScalarArithmeticColumn(String[] tdesc) throws Exception { generateScalarArithmeticOperatorColumn(tdesc, returnType, className); } - private void generateFilterDecimalColumnCompareScalar(String[] tdesc) throws IOException { + private void generateFilterDecimalColumnCompareDecimalScalar(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterDecimalCol" + operatorName + "DecimalScalar"; generateDecimalColumnCompare(tdesc, className); } - private void generateFilterDecimalScalarCompareColumn(String[] tdesc) throws IOException { + private void generateFilterDecimalScalarCompareDecimalColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterDecimalScalar" + operatorName + "DecimalColumn"; generateDecimalColumnCompare(tdesc, className); } - private void generateFilterDecimalColumnCompareColumn(String[] tdesc) throws IOException { + private void generateFilterDecimalColumnCompareDecimalColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; String className = "FilterDecimalCol" + operatorName + "DecimalColumn"; generateDecimalColumnCompare(tdesc, className); @@ -2395,310 +2907,308 @@ private void generateFilterDTIColumnCompareScalar(String[] tdesc) throws Excepti className, templateString); } - private void generateColumnArithmeticColumnWithConvert(String[] tdesc) throws Exception { + private void generateDateTimeArithmeticIntervalYearMonth(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion1 = tdesc[5]; - String typeConversion2 = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String operatorSymbol = tdesc[2]; + String operandType1 = tdesc[3]; + String colOrScalar1 = tdesc[4]; + String operandType2 = tdesc[5]; + String colOrScalar2 = tdesc[6]; + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType1); - templateString = templateString.replaceAll("", inputColumnVectorType2); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); templateString = templateString.replaceAll("", operatorSymbol); - templateString = templateString.replaceAll("", operandType1); - templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion1); - templateString = templateString.replaceAll("", typeConversion2); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); - testCodeGen.addColumnColumnOperationTestCases( + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); + + if (colOrScalar1.equals("Col") && colOrScalar1.equals("Column")) { + testCodeGen.addColumnColumnOperationTestCases( + className, + inputColumnVectorType1, + inputColumnVectorType2, + "long"); + } else if (colOrScalar1.equals("Col") && colOrScalar1.equals("Scalar")) { + String testScalarType = operandType2; + if (isDateIntervalType(testScalarType)) { + testScalarType = "long"; + } + testCodeGen.addColumnScalarOperationTestCases( + true, className, inputColumnVectorType1, - inputColumnVectorType2, - outputColumnVectorType); + "long", + testScalarType); + } else if (colOrScalar1.equals("Scalar") && colOrScalar1.equals("Column")) { + String testScalarType = operandType1; + if (isDateIntervalType(testScalarType)) { + testScalarType = "long"; + } + + testCodeGen.addColumnScalarOperationTestCases( + false, + className, + inputColumnVectorType2, + "long", + testScalarType); + } + } + + private String getTimestampHiveType(String operandType) { + if (operandType.equals("timestamp")) { + return "Timestamp"; + } else if (operandType.equals("interval_day_time")) { + return "HiveIntervalDayTime"; + } else { + return "Unknown"; + } + } + + private String getPisaTimestampConversion(String operandType) { + if (operandType.equals("timestamp")) { + return "new PisaTimestamp"; + } else if (operandType.equals("interval_day_time")) { + return "new PisaTimestamp().updateFromEpochNanoseconds"; + } else { + return "Unknown"; + } + } + + private String getOptionalValueConvert(String operandType) { + if (operandType.equals("timestamp")) { + return ""; // Nothing. + } else if (operandType.equals("interval_day_time")) { + return "DateUtils.getIntervalDayTimeTotalNanos"; + } else { + return "Unknown"; + } } - private void generateScalarArithmeticColumnWithConvert(String[] tdesc) throws Exception { + private String replaceTimestampScalar(String templateString, int argNum, String operandType) { + + if (!operandType.equals("timestamp") && !operandType.equals("interval_day_time")) { + return templateString; + } + + System.out.println("replaceTimestampScalar operandType " + operandType); + + String scalarHiveTimestampTypePattern = ""; + String pisaTimestampConversionPattern = ""; + String optionalValueConvertPattern = ""; + + System.out.println("replaceTimestampScalar scalarHiveTimestampTypePattern " + scalarHiveTimestampTypePattern + " value " + + getTimestampHiveType(operandType)); + System.out.println("replaceTimestampScalar pisaTimestampConversionPattern " + pisaTimestampConversionPattern + " value " + + getPisaTimestampConversion(operandType)); + System.out.println("replaceTimestampScalar optionalValueConvertPattern " + optionalValueConvertPattern + " value " + + getOptionalValueConvert(operandType)); + + templateString = templateString.replaceAll(scalarHiveTimestampTypePattern, getTimestampHiveType(operandType)); + templateString = templateString.replaceAll(pisaTimestampConversionPattern, getPisaTimestampConversion(operandType)); + templateString = templateString.replaceAll(optionalValueConvertPattern, getOptionalValueConvert(operandType)); + + return templateString; + } + + private void generateTimestampArithmeticTimestampBase(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion1 = tdesc[5]; - String typeConversion2 = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType( - returnType == null ? "long" : returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType2); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[2]; + String colOrScalar2 = tdesc[3]; + + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); - templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); - templateString = templateString.replaceAll("", operandType1); - templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion1); - templateString = templateString.replaceAll("", typeConversion2); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); - writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, - className, templateString); - - String testScalarType = operandType1; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorName.toLowerCase()); - testCodeGen.addColumnScalarOperationTestCases( - false, - className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + baseClassName, templateString); } - private void generateColumnArithmeticScalarWithConvert(String[] tdesc) throws Exception { + private void generateTimestampArithmeticTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion1 = tdesc[5]; - String typeConversion2 = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType1); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[3]; + String operandType2 = tdesc[4]; + String colOrScalar2 = tdesc[5]; + + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; + + + System.out.println("generateTimestampArithmeticTimestamp enter... templateFile " + tdesc[0]); + System.out.println("generateTimestampArithmeticTimestamp operandType1 " + operandType1); + System.out.println("generateTimestampArithmeticTimestamp colOrScalar1 " + colOrScalar1); + System.out.println("generateTimestampArithmeticTimestamp operandType2 " + operandType2); + System.out.println("generateTimestampArithmeticTimestamp colOrScalar2 " + colOrScalar2); + System.out.println("generateTimestampArithmeticTimestamp className " + className); + System.out.println("generateTimestampArithmeticTimestamp baseClassName " + baseClassName); //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion1); - templateString = templateString.replaceAll("", typeConversion2); + if (colOrScalar1.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 1, operandType1); + } + if (colOrScalar2.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 2, operandType2); + } + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); - String testScalarType = operandType2; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - testCodeGen.addColumnScalarOperationTestCases( - true, + /* UNDONE: Col Col, vs Scalar Col vs Col Scalar + testCodeGen.addColumnColumnOperationTestCases( className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + inputColumnVectorType1, + inputColumnVectorType2, + "long"); + */ + } + + private void generateDateArithmeticTimestampBase(String[] tdesc) throws Exception { + String operatorName = tdesc[1]; + String colOrScalar1 = tdesc[2]; + String colOrScalar2 = tdesc[3]; + + String baseClassName = "Date" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; + + //Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorName.toLowerCase()); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + baseClassName, templateString); } - private void generateDateTimeColumnArithmeticIntervalColumnWithConvert(String[] tdesc) throws Exception { + private void generateDateArithmeticTimestamp(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion = tdesc[5]; - String operatorFunction = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[3]; + String operandType2 = tdesc[4]; + String colOrScalar2 = tdesc[5]; + + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; + String baseClassName = "Date" + colOrScalar1 + operatorName + + "Timestamp" + colOrScalar2 + "Base"; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType1); - templateString = templateString.replaceAll("", inputColumnVectorType2); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion); - templateString = templateString.replaceAll("", operatorFunction); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); + if (colOrScalar1.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 1, operandType1); + } + if (colOrScalar2.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 2, operandType2); + } + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); + + /* UNDONE: Col Col, vs Scalar Col vs Col Scalar testCodeGen.addColumnColumnOperationTestCases( className, inputColumnVectorType1, inputColumnVectorType2, - outputColumnVectorType); + "long"); + */ } - private void generateDateTimeScalarArithmeticIntervalColumnWithConvert(String[] tdesc) throws Exception { + private void generateTimestampArithmeticDateBase(String[] tdesc) throws Exception { String operatorName = tdesc[1]; - String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion = tdesc[5]; - String operatorFunction = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Scalar" + operatorName + getCamelCaseType(operandType2) + "Column"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType( - returnType == null ? "long" : returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType2); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[2]; + String colOrScalar2 = tdesc[3]; + + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Date" + colOrScalar2 + "Base"; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); - templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); - templateString = templateString.replaceAll("", operandType1); - templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion); - templateString = templateString.replaceAll("", operatorFunction); - templateString = templateString.replaceAll("", getCamelCaseType(vectorReturnType)); - writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, - className, templateString); - - String testScalarType = operandType1; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorName.toLowerCase()); - testCodeGen.addColumnScalarOperationTestCases( - false, - className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + baseClassName, templateString); } - private void generateDateTimeColumnArithmeticIntervalScalarWithConvert(String[] tdesc) throws Exception { + private void generateTimestampArithmeticDate(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; - String operandType2 = tdesc[3]; - String operatorSymbol = tdesc[4]; - String typeConversion = tdesc[5]; - String operatorFunction = tdesc[6]; - String className = getCamelCaseType(operandType1) - + "Col" + operatorName + getCamelCaseType(operandType2) + "Scalar"; - String returnType = getArithmeticReturnType(operandType1, operandType2); - String outputColumnVectorType = this.getColumnVectorType(returnType); - String inputColumnVectorType = this.getColumnVectorType(operandType1); - String inputColumnVectorType1 = this.getColumnVectorType(operandType1); - String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - // For date/timestamp/interval, this should be "long" - String vectorOperandType1 = this.getVectorPrimitiveType(inputColumnVectorType1); - String vectorOperandType2 = this.getVectorPrimitiveType(inputColumnVectorType2); - String vectorReturnType = this.getVectorPrimitiveType(outputColumnVectorType); + String colOrScalar1 = tdesc[3]; + String operandType2 = tdesc[4]; + String colOrScalar2 = tdesc[5]; + + String className = getCamelCaseType(operandType1) + colOrScalar1 + operatorName + + getCamelCaseType(operandType2) + colOrScalar2; + String baseClassName = "Timestamp" + colOrScalar1 + operatorName + + "Date" + colOrScalar2 + "Base"; //Read the template into a string; File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", inputColumnVectorType); - templateString = templateString.replaceAll("", outputColumnVectorType); - templateString = templateString.replaceAll("", operatorName); - templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); - templateString = templateString.replaceAll("", returnType); - templateString = templateString.replaceAll("", vectorOperandType1); - templateString = templateString.replaceAll("", vectorOperandType2); - templateString = templateString.replaceAll("", vectorReturnType); - templateString = templateString.replaceAll("", typeConversion); - templateString = templateString.replaceAll("", operatorFunction); + if (colOrScalar1.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 1, operandType1); + } + if (colOrScalar2.equals("Scalar")) { + templateString = replaceTimestampScalar(templateString, 2, operandType2); + } + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); - String testScalarType = operandType2; - if (isDateTimeIntervalType(testScalarType)) { - testScalarType = "long"; - } + String inputColumnVectorType1 = this.getColumnVectorType(operandType1); + String inputColumnVectorType2 = this.getColumnVectorType(operandType2); - testCodeGen.addColumnScalarOperationTestCases( - true, + /* UNDONE: Col Col, vs Scalar Col vs Col Scalar + testCodeGen.addColumnColumnOperationTestCases( className, - inputColumnVectorType, - outputColumnVectorType, - testScalarType); + inputColumnVectorType1, + inputColumnVectorType2, + "long"); + */ } - private static boolean isDateTimeIntervalType(String type) { + private static boolean isDateIntervalType(String type) { return (type.equals("date") - || type.equals("timestamp") - || type.equals("interval_year_month") + || type.equals("interval_year_month")); + } + + private static boolean isTimestampIntervalType(String type) { + return (type.equals("timestamp") || type.equals("interval_day_time")); } @@ -2821,12 +3331,14 @@ private String getArithmeticReturnType(String operandType1, private String getColumnVectorType(String primitiveType) throws Exception { if(primitiveType.equals("double")) { return "DoubleColumnVector"; - } else if (primitiveType.equals("long") || isDateTimeIntervalType(primitiveType)) { + } else if (primitiveType.equals("long") || isDateIntervalType(primitiveType)) { return "LongColumnVector"; } else if (primitiveType.equals("decimal")) { return "DecimalColumnVector"; } else if (primitiveType.equals("string")) { return "BytesColumnVector"; + } else if (isTimestampIntervalType(primitiveType)) { + return "TimestampColumnVector"; } throw new Exception("Unimplemented primitive column vector type: " + primitiveType); } diff --git orc/src/java/org/apache/orc/TypeDescription.java orc/src/java/org/apache/orc/TypeDescription.java index f97a113..bd900ac 100644 --- orc/src/java/org/apache/orc/TypeDescription.java +++ orc/src/java/org/apache/orc/TypeDescription.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -282,9 +283,10 @@ private ColumnVector createColumn(int maxSize) { case SHORT: case INT: case LONG: - case TIMESTAMP: case DATE: return new LongColumnVector(); + case TIMESTAMP: + return new TimestampColumnVector(); case FLOAT: case DOUBLE: return new DoubleColumnVector(); diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..218dd09 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareTimestampColumn.txt @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnCompareTimestampColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector1 = () batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + [] vector1 = inputColVector1.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = vector1[0] inputColVector2.(0) ? 1 : 0; + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[0] inputColVector2.(i) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[0] inputColVector2.(i) ? 1 : 0; + } + } + } else if (inputColVector2.isRepeating) { + value2 = inputColVector2.(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] value2 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value2 ? 1 : 0; + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] inputColVector2.(i) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] inputColVector2.(i) ? 1 : 0; + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareTimestampScalar.txt new file mode 100644 index 0000000..16f6cc1 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareTimestampScalar.txt @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnCompareTimestampScalar.txt, which covers binary comparison + * expressions between a column and a scalar. The boolean output is stored in a + * separate boolean column. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + private int outputColumn; + + public (int colNum, Timestamp value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp(value).(); + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector1 = () batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector1.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + [] vector1 = inputColVector1.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector1.noNulls; + if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = vector1[0] value ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } + } + } else { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = vector1[0] value ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = vector1[i] value ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = vector1[i] value ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt index f2ec645..fe8f535 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIColumnNoConvert.txt @@ -34,6 +34,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends LongColLongColumn { + private static final long serialVersionUID = 1L; + public (int colNum1, int colNum2, int outputColumn) { super(colNum1, colNum2, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt index 1a360b8..293369f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnArithmeticDTIScalarNoConvert.txt @@ -29,6 +29,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends LongColLongScalar { + private static final long serialVersionUID = 1L; + public (int colNum, long value, int outputColumn) { super(colNum, value, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt index 9d692cb..60884cd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt @@ -29,6 +29,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends { + private static final long serialVersionUID = 1L; + public (int colNum, long value, int outputColumn) { super(colNum, value, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt index 753ea71..04607f6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIScalarArithmeticDTIColumnNoConvert.txt @@ -34,6 +34,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends LongScalarLongColumn { + private static final long serialVersionUID = 1L; + public (long value, int colNum, int outputColumn) { super(value, colNum, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt index fdd453a..d518c44 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt @@ -34,6 +34,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; */ public class extends { + private static final long serialVersionUID = 1L; + public (long value, int colNum, int outputColumn) { super(value, colNum, outputColumn); } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..a7c6d38 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type interval_year_month (months). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector1[0], (int) vector2[0]); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector1[0], (int) vector2[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector1[0], (int) vector2[i]); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[0]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[0]); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector1[i], (int) vector2[i]); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt new file mode 100644 index 0000000..2ae7d0e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector[0], (int) value); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt new file mode 100644 index 0000000..9530c04 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticTimestampColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..85fd8fd --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumnBase.txt @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template DateColumnArithmeticTimestampColumnBase.txt, which covers binary arithmetic + * expressions between columns. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type timestamp/interval_day_time. + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + inputColVector2.getPisaTimestamp(0), + 0); + } else if (inputColVector1.isRepeating) { + PisaTimestamp value1 = + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value1, + inputColVector2.getPisaTimestamp(i), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value1, + inputColVector2.getPisaTimestamp(i), + i); + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = inputColVector2.getPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value2, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value2, + i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + inputColVector2.getPisaTimestamp(i), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + inputColVector2.getPisaTimestamp(i), + i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt new file mode 100644 index 0000000..fc1d1b7 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hive.common.util.DateUtils; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticTimestampScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, value, int outputColumn) { + super(colNum, ((value)), outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalarBase.txt new file mode 100644 index 0000000..087f5cd --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalarBase.txt @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template DateColumnArithmeticTimestampScalarBase.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum, PisaTimestamp value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + value, + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..c4481c0 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateTimeScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type Interval_Year_Month (months). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type Date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(value, (int) vector[0]); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt new file mode 100644 index 0000000..0272036 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateScalarArithmeticTimestampColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (long value, int colNum, int outputColumn) { + super(value, colNum, outputColumn); + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..70d0dc6 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumnBase.txt @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template DateTimeScalarArithmeticTimestampColumnBase.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromEpochMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type timestamp/interval_day_time. + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.( + value, + inputColVector2.getPisaTimestamp(0), + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + inputColVector2.getPisaTimestamp(i), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + inputColVector2.getPisaTimestamp(i), + i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + inputColVector2.getPisaTimestamp(i), + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + inputColVector2.getPisaTimestamp(i), + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalColumnWithConvert.txt deleted file mode 100644 index cd7a1e7..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalColumnWithConvert.txt +++ /dev/null @@ -1,175 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template DateTimeColumnArithmeticIntervalColumnWithConvert.txt, which covers binary arithmetic - * expressions between columns. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum1, int colNum2, int outputColumn) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector1 = () batch.cols[colNum1]; - inputColVector2 = () batch.cols[colNum2]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - [] vector1 = inputColVector1.vector; - [] vector2 = inputColVector2.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first - NullUtil.propagateNullsColCol( - inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - - /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or - * more inputs are null. This is to improve speed by avoiding - * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputVector[0] = ((vector1[0]), (int) vector2[0]); - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector1[0]), (int) vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector1[0]), (int) vector2[i]); - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector1[i]), (int) vector2[0]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector1[i]), (int) vector2[0]); - } - } - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector1[i]), (int) vector2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector1[i]), (int) vector2[i]); - } - } - } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and - * NaN for double. This is to prevent possible later zero-divide errors - * in complex arithmetic expressions like col2 / (col1 - 1) - * in the case when some col1 entries are null. - */ - NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} - diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt deleted file mode 100644 index abee249..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeColumnArithmeticIntervalScalarWithConvert.txt +++ /dev/null @@ -1,152 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template ColumnArithmeticScalarWithConvert.txt, which covers binary arithmetic - * expressions between a column and a scalar. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum, value, int outputColumn) { - this.colNum = colNum; - this.value = value; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = ((vector[0]), (int) value); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt deleted file mode 100644 index 93a441a..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/DateTimeScalarArithmeticIntervalColumnWithConvert.txt +++ /dev/null @@ -1,165 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - - -/* - * Because of the templatized nature of the code, either or both - * of these ColumnVector imports may be needed. Listing both of them - * rather than using ....vectorization.*; - */ -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template DateTimeScalarArithmeticIntervalColumnWithConvert.txt. - * Implements a vectorized arithmetic operator with a scalar on the left and a - * column vector on the right. The result is output to an output column vector. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public ( value, int colNum, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - /** - * Method to evaluate scalar-column operation in vectorized fashion. - * - * @batch a package of rows with each column stored in a vector - */ - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.isRepeating) { - outputVector[0] = (value, (int) vector[0]); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - } - } else { /* there are nulls */ - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..e8d52c8 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareTimestampColumn.txt @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterColumnCompareColumn.txt, which covers binary comparison + * expressions between two columns, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector1 = () batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + [] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // filter rows with NULL on left input + int newSize; + newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // filter rows with NULL on right input + newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // All rows with nulls have been filtered out, so just do normal filter for non-null case + if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!(vector1[0] inputColVector2.(0))) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + value1 = vector1[0]; + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (value1 inputColVector2.(i)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (value1 inputColVector2.(i)) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + value2 = inputColVector2.(0); + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] value2) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i] value2) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] inputColVector2.(i)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i] inputColVector2.(i)) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareTimestampScalar.txt new file mode 100644 index 0000000..8abc97e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareTimestampScalar.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterColumnCompareScalar.txt, which covers binary comparison + * expressions between a column and a scalar, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, Timestamp value) { + super(colNum, new PisaTimestamp(value).()); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt index 55193ac..2351230 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDTIColumnCompareScalar.txt @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareColumn.txt deleted file mode 100644 index 353e849..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareColumn.txt +++ /dev/null @@ -1,445 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; - -/** - * Generated from template FilterDecimalColumnCompareColumn.txt, which covers binary comparison - * filter expressions between two columns. Output is not produced in a separate column. - * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - - public (int colNum1, int colNum2) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; - DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; - int[] sel = batch.selected; - boolean[] nullPos1 = inputColVector1.isNull; - boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - HiveDecimalWritable[] vector1 = inputColVector1.vector; - HiveDecimalWritable[] vector2 = inputColVector2.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // handle case where neither input has nulls - if (inputColVector1.noNulls && inputColVector2.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - - /* Either all must remain selected or all will be eliminated. - * Repeating property will not change. - */ - if (!(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - - // handle case where only input 2 has nulls - } else if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos2[0] || - !(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - - // no need to check for nulls in input 1 - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - - // no values will qualify because every comparison will be with NULL - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - - // handle case where only input 1 has nulls - } else if (inputColVector2.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos1[0] || - !(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - return; - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - - // if repeating value is null then every comparison will fail so nothing qualifies - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - - // handle case where both inputs have nulls - } else { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos1[0] || nullPos2[0] || - !(vector1[0].compareTo(vector2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (vector1[0].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (vector1[i].compareTo(vector2[0]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i] && !nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i] && !nullPos2[i]) { - if (vector1[i].compareTo(vector2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("decimal"), - VectorExpressionDescriptor.ArgumentType.getType("decimal")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt new file mode 100644 index 0000000..353e849 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -0,0 +1,445 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * Generated from template FilterDecimalColumnCompareColumn.txt, which covers binary comparison + * filter expressions between two columns. Output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; + DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; + HiveDecimalWritable[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(vector1[0].compareTo(vector2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (vector1[0].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (vector1[i].compareTo(vector2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (vector1[i].compareTo(vector2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("decimal"), + VectorExpressionDescriptor.ArgumentType.getType("decimal")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt new file mode 100644 index 0000000..bdd39b9 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; + +/** + * This is a generated class to evaluate a comparison on a vector of decimal + * values. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private HiveDecimal value; + + public (int colNum, HiveDecimal value) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + HiveDecimalWritable[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(DecimalUtil.compare(vector[0], value) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(DecimalUtil.compare(vector[0], value) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (DecimalUtil.compare(vector[i], value) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("decimal"), + VectorExpressionDescriptor.ArgumentType.getType("decimal")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareScalar.txt deleted file mode 100644 index bdd39b9..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareScalar.txt +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.common.type.HiveDecimal; - -/** - * This is a generated class to evaluate a comparison on a vector of decimal - * values. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private HiveDecimal value; - - public (int colNum, HiveDecimal value) { - this.colNum = colNum; - this.value = value; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!(DecimalUtil.compare(vector[0], value) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - if (!(DecimalUtil.compare(vector[0], value) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else { - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - } - - // Change the selected vector - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - if (DecimalUtil.compare(vector[i], value) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("decimal"), - VectorExpressionDescriptor.ArgumentType.getType("decimal")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareColumn.txt deleted file mode 100644 index 0608016..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareColumn.txt +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.common.type.HiveDecimal; - -/** - * This is a generated class to evaluate a comparison on a vector of decimal - * values. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private HiveDecimal value; - - public (HiveDecimal value, int colNum) { - this.colNum = colNum; - this.value = value; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!(DecimalUtil.compare(value, vector[0]) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - if (!(DecimalUtil.compare(value, vector[0]) 0)) { - - // Entire batch is filtered out. - batch.size = 0; - } - } else { - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - } - - // Change the selected vector - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - if (DecimalUtil.compare(value, vector[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("decimal"), - VectorExpressionDescriptor.ArgumentType.getType("decimal")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt new file mode 100644 index 0000000..0608016 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; + +/** + * This is a generated class to evaluate a comparison on a vector of decimal + * values. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private HiveDecimal value; + + public (HiveDecimal value, int colNum) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + HiveDecimalWritable[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(DecimalUtil.compare(value, vector[0]) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(DecimalUtil.compare(value, vector[0]) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (DecimalUtil.compare(value, vector[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("decimal"), + VectorExpressionDescriptor.ArgumentType.getType("decimal")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareTimestampColumn.txt index e0e5022..39cf597 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareTimestampColumn.txt @@ -18,25 +18,135 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** * Generated from template FilterScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a long or double scalar and a column, however output is not produced in a separate column. - * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. + * expressions between a long/double scalar and a timestamp column, however output is not produced + * in a separate column. The selected vector of the input {@link VectorizedRowBatch} is updated + * for in-place filtering. */ -public class extends { +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; public ( value, int colNum) { - super(TimestampUtils.(value), colNum); + this.colNum = colNum; + this.value = value; } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!(value inputColVector.(0))) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + if (!(value inputColVector.(0))) { + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + } + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (value inputColVector.(i)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt new file mode 100644 index 0000000..328dcc3 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterTimestampColumnBetween.txt, which covers [NOT] BETWEEN filter + * expressions where a column is [NOT] between one scalar and another. + * Output is not produced in a separate column. The selected vector of the input + * {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + + // The comparison is of the form "column BETWEEN leftValue AND rightValue" + private PisaTimestamp leftValue; + private PisaTimestamp rightValue; + private PisaTimestamp scratchValue; + + public (int colNum, Timestamp leftValue, Timestamp rightValue) { + this.colNum = colNum; + this.leftValue = new PisaTimestamp(leftValue); + this.rightValue = new PisaTimestamp(rightValue); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. + // Repeating property will not change. + if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. + // Repeating property will not change. + if (!nullPos[0]) { + if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareColumn.txt new file mode 100644 index 0000000..f3cc1d8 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareColumn.txt @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterColumnCompareColumn.txt, which covers binary comparison + * expressions between two columns, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + inputColVector2 = () batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + [] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // filter rows with NULL on left input + int newSize; + newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // filter rows with NULL on right input + newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // All rows with nulls have been filtered out, so just do normal filter for non-null case + if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!(inputColVector1.(0) vector2[0])) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + value1 = inputColVector1.(0); + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (value1 vector2[i]) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (value1 vector2[i]) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + value2 = vector2[0]; + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.(i) value2) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.(i) value2) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.(i) vector2[i]) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.(i) vector2[i]) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareScalar.txt index 0c37b4d..0f0741d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareScalar.txt @@ -18,26 +18,135 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** * Generated from template FilterTimestampColumnCompareScalar.txt, which covers comparison - * expressions between a timestamp column and a long or double scalar, however output is not - * produced in a separate column. - * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. + * expressions between a timestamp column and a long/double scalar, however output is not produced + * in a separate column. The selected vector of the input {@link VectorizedRowBatch} is updated + * for in-place filtering. */ -public class extends { +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; public (int colNum, value) { - super(colNum, TimestampUtils.(value)); + this.colNum = colNum; + this.value = value; } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!(inputColVector.(0) value)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + if (!(inputColVector.(0) value)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + } + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (inputColVector.(i) value) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; } @Override @@ -53,4 +162,4 @@ public class extends { VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); } -} \ No newline at end of file +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..3085954 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -0,0 +1,443 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +/** + * Generated from template FilterDecimalColumnCompareColumn.txt, which covers binary comparison + * filter expressions between two columns. Output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(inputColVector1.compareTo(0, inputColVector2, 0) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (inputColVector1.compareTo(0, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (inputColVector1.compareTo(i, inputColVector2, 0) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (inputColVector1.compareTo(i, inputColVector2, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index d13fecf..d502d89 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -18,24 +18,129 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** - * Generated from template FilterTimestampColumnCompareTimestampScalar.txt, which covers comparison - * expressions between a timestamp column and a timestamp scalar, however output is not - * produced in a separate column. + * Generated from template FilterColumnCompareScalar.txt, which covers binary comparison + * expressions between a column and a scalar, however output is not produced in a separate column. * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. */ -public class extends { +public class extends VectorExpression { - public (int colNum, long value) { - super(colNum, value); + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + + public (int colNum, Timestamp value) { + this.colNum = colNum; + this.value = new PisaTimestamp(value); } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!(inputColVector.compareTo(0, value) 0)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + if (!(inputColVector.compareTo(0, value) 0)) { + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + } + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (inputColVector.compareTo(i, value) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; } @Override @@ -51,4 +156,4 @@ public class extends { VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); } -} \ No newline at end of file +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareColumn.txt new file mode 100644 index 0000000..874389c --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template FilterScalarCompareColumn.txt, which covers binary comparison + * expressions between a scalar and a column, however output is not produced in a separate column. + * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (Timestamp value, int colNum) { + super(new PisaTimestamp(value).(), colNum); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index a37db3d..0df212d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -15,26 +15,134 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; /** - * Generated from template FilterTimestampScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a timestamp scalar and a column, however output is not produced in a separate column. - * The selected vector of the input {@link VectorizedRowBatch} is updated for in-place filtering. + * This is a generated class to evaluate a comparison on a vector of timestamp + * values. */ -public class extends { +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; - public (long value, int colNum) { - super(value, colNum); + private int colNum; + private PisaTimestamp value; + + public (Timestamp value, int colNum) { + this.colNum = colNum; + this.value = new PisaTimestamp(value); } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(inputColVector.compareTo(value, 0) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(inputColVector.compareTo(value, 0) 0)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (inputColVector.compareTo(value, i) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeColumnWithConvert.txt deleted file mode 100644 index c182557..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeColumnWithConvert.txt +++ /dev/null @@ -1,177 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template IntervalColumnArithmeticDateTimeColumnWithConvert.txt, which covers binary arithmetic - * expressions between columns. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum1, int colNum2, int outputColumn) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector1 = () batch.cols[colNum1]; - inputColVector2 = () batch.cols[colNum2]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - [] vector1 = inputColVector1.vector; - [] vector2 = inputColVector2.vector; - [] outputVector = outputColVector.vector; - - // arg1 is interval type, arg2 is datetime type - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = - inputColVector1.isRepeating && inputColVector2.isRepeating - || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] - || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first - NullUtil.propagateNullsColCol( - inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - - /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or - * more inputs are null. This is to improve speed by avoiding - * conditional checks in the inner loop. - */ - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputVector[0] = ((vector2[0]), (int) vector1[0]); - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector2[0]), (int) vector1[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector2[0]), (int) vector1[i]); - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector2[i]), (int) vector1[0]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector2[i]), (int) vector1[0]); - } - } - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector2[i]), (int) vector1[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector2[i]), (int) vector1[i]); - } - } - } - - /* For the case when the output can have null values, follow - * the convention that the data values must be 1 for long and - * NaN for double. This is to prevent possible later zero-divide errors - * in complex arithmetic expressions like col2 / (col1 - 1) - * in the case when some col1 entries are null. - */ - NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} - diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt deleted file mode 100644 index 8fa3563..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalColumnArithmeticDateTimeScalarWithConvert.txt +++ /dev/null @@ -1,154 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template IntervalColumnArithmeticDateTimeScalarWithConvert.txt, which covers binary arithmetic - * expressions between a column and a scalar. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public (int colNum, value, int outputColumn) { - this.colNum = colNum; - this.value = (value); - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // arg1 is interval, arg2 is datetime - - if (inputColVector.isRepeating) { - outputVector[0] = (value, (int) vector[0]); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (value, (int) vector[i]); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (value, (int) vector[i]); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt deleted file mode 100644 index 0464a5e..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalScalarArithmeticDateTimeColumnWithConvert.txt +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.*; - - -/* - * Because of the templatized nature of the code, either or both - * of these ColumnVector imports may be needed. Listing both of them - * rather than using ....vectorization.*; - */ -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.util.DateTimeMath; - -/** - * Generated from template IntervalScalarArithmeticDateTimeColumnWithConvert.txt. - * Implements a vectorized arithmetic operator with a scalar on the left and a - * column vector on the right. The result is output to an output column vector. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private value; - private int outputColumn; - private DateTimeMath dtm = new DateTimeMath(); - - public ( value, int colNum, int outputColumn) { - this.colNum = colNum; - this.value = value; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - /** - * Method to evaluate scalar-column operation in vectorized fashion. - * - * @batch a package of rows with each column stored in a vector - */ - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - inputColVector = () batch.cols[colNum]; - outputColVector = () batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] inputIsNull = inputColVector.isNull; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; - [] vector = inputColVector.vector; - [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // arg1 is interval, arg2 is datetime - - if (inputColVector.isRepeating) { - outputVector[0] = ((vector[0]), (int) value); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - } - } else { /* there are nulls */ - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ((vector[i]), (int) value); - outputIsNull[i] = inputIsNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ((vector[i]), (int) value); - } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); - } - } - - NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return ""; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public getValue() { - return value; - } - - public void setValue( value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType(""), - VectorExpressionDescriptor.ArgumentType.getType("")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt new file mode 100644 index 0000000..0306561 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type interval_year_month (months). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector2[0], (int) vector1[0]); + } else if (inputColVector1.isRepeating) { + long value1 = vector1[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) value1); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) value1); + } + } + } else if (inputColVector2.isRepeating) { + long value2 = vector2[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value2, (int) vector1[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value2, (int) vector1[i]); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) vector1[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector2[i], (int) vector1[i]); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt new file mode 100644 index 0000000..21e169a --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type interval_year_mont (epochMonths). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(value, (int) vector[0]); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(value, (int) vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt new file mode 100644 index 0000000..e9ff178 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Interval_Year_Month (months). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector2.getPisaTimestamp(0), (int) vector1[0], + scratchPisaTimestamp)); + } else if (inputColVector1.isRepeating) { + long value1 = vector1[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.getPisaTimestamp(i), (int) value1, + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.getPisaTimestamp(i), (int) value1, + scratchPisaTimestamp)); + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = inputColVector2.getPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value2, (int) vector1[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value2, (int) vector1[i], + scratchPisaTimestamp)); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.getPisaTimestamp(i), (int) vector1[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector2.getPisaTimestamp(i), (int) vector1[i], + scratchPisaTimestamp)); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt new file mode 100644 index 0000000..55abd8f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, Timestamp value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp(value); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type interval_year_month (epochMonths). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[0], + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector1[i], + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt new file mode 100644 index 0000000..2dc89b7 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template DateTimeScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type date. + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type Date. + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + long[] vector = inputColVector.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = dtm.addMonthsToDays(vector[0], (int) value); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = dtm.addMonthsToDays(vector[i], (int) value); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt new file mode 100644 index 0000000..ecb5721 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type timestamp. + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector.getPisaTimestamp(0), (int) value, + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareTimestampColumn.txt index 7867610..90eedce 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareTimestampColumn.txt @@ -18,33 +18,113 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import java.sql.Timestamp; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.io.LongWritable; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** - * Generated from template ScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a timestamp column and a long or double scalar. The boolean output - * is stored in a separate boolean column. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. + * Generated from template ScalarCompareTimestamp.txt, which covers comparison + * expressions between a long/double scalar and a column. The boolean output is stored in a + * separate boolean column. */ -public class extends { +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + private int outputColumn; public ( value, int colNum, int outputColumn) { - super(TimestampUtils.(value), colNum, outputColumn); + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = value inputColVector.(0) ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = value inputColVector.(0) ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt new file mode 100644 index 0000000..6bbc44c --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticDateColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumnBase.txt new file mode 100644 index 0000000..45fd3fa --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumnBase.txt @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template TimestampColumnArithmeticDateColumnBase.txt, which covers binary arithmetic + * expressions between columns. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + + // Input #2 is type date. + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.( + inputColVector1.getPisaTimestamp(0), + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + } else if (inputColVector1.isRepeating) { + PisaTimestamp value1 = inputColVector1.getPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value1, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value1, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), + value2, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), + value2, + i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt new file mode 100644 index 0000000..456e58e --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticDateScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, long value, int outputColumn) { + super(colNum, value, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalarBase.txt new file mode 100644 index 0000000..39538df --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalarBase.txt @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template TimestampColumnArithmeticDateScalarBase.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromEpochMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.( + inputColVector1.getPisaTimestamp(0), value, 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..7d2d7df --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,177 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + + // Input #2 is type Interval_Year_Month (months). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(0), (int) vector2[0], + scratchPisaTimestamp)); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(0), (int) vector2[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(0), (int) vector2[i], + scratchPisaTimestamp)); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) vector2[0], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) vector2[0], + scratchPisaTimestamp)); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) vector2[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) vector2[i], + scratchPisaTimestamp)); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt new file mode 100644 index 0000000..8ef2dd9 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticIntervalYearMonthScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private long value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type Timestamp (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(0), (int) value, + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(inputColVector1.getPisaTimestamp(i), (int) value, + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt new file mode 100644 index 0000000..84a0e97 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum1, int colNum2, int outputColumn) { + super(colNum1, colNum2, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..2eec5e1 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumnBase.txt @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampColumnBase.txt, which covers binary arithmetic + * expressions between columns. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + + // Input #2 is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + + // Output is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.( + inputColVector1.getPisaTimestamp(0), inputColVector2.getPisaTimestamp(0), 0); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(0), inputColVector2.getPisaTimestamp(i), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(0), inputColVector2.getPisaTimestamp(i), i); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), inputColVector2.getPisaTimestamp(0), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), inputColVector2.getPisaTimestamp(0), i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), inputColVector2.getPisaTimestamp(i), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), inputColVector2.getPisaTimestamp(i), i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} + diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt new file mode 100644 index 0000000..38b0025 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hive.common.util.DateUtils; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampScalar.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (int colNum, value, int outputColumn) { + super(colNum, ((value)), outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalarBase.txt new file mode 100644 index 0000000..22af302 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalarBase.txt @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampColumnArithmeticTimestampScalarBase.txt, which covers binary arithmetic + * expressions between a column and a scalar. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private DateTimeMath dtm = new DateTimeMath(); + + public (int colNum, PisaTimestamp value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type timestamp/interval_day_time (PisaTimestamp). + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type timestamp/interval_day_time. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.( + inputColVector1.getPisaTimestamp(0), value, 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + inputColVector1.getPisaTimestamp(i), value, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareColumn.txt new file mode 100644 index 0000000..4446f16 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareColumn.txt @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnArithmeticColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + inputColVector2 = () batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + [] vector2 = inputColVector2.vector; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = inputColVector1.(0) vector2[0] ? 1 : 0; + } else if (inputColVector1.isRepeating) { + value1 = inputColVector1.(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value1 vector2[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = value1 vector2[i] ? 1 : 0; + } + } + } else if (inputColVector2.isRepeating) { + value2 = vector2[0]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.(i) vector2[0] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.(i) vector2[0] ? 1 : 0; + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.(i) vector2[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.(i) vector2[i] ? 1 : 0; + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareScalar.txt index da33281..2340c61 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareScalar.txt @@ -18,26 +18,113 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; - +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - /** * Generated from template TimestampColumnCompareScalar.txt, which covers comparison - * expressions between a timestamp column and a long or double scalar. The boolean output - * is stored in a separate boolean column. - * Note: For timestamp and long or double we implicitly interpret the long as the number - * of seconds or double as seconds and fraction since the epoch. + * expressions between a Timestamp column and a long/double scalar. The boolean output is stored + * in a separate boolean column. */ -public class extends { +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private value; + private int outputColumn; public (int colNum, value, int outputColumn) { - super(colNum, TimestampUtils.(value), outputColumn); + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.(0) value ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inputColVector.(0) value ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt new file mode 100644 index 0000000..382240c --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnArithmeticColumn.txt, which covers binary arithmetic + * expressions between columns. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum1]; + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputVector[0] = inputColVector1.compareTo(0, inputColVector2.getPisaTimestamp(0)) 0 ? 1 : 0; + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(0, inputColVector2.getPisaTimestamp(i)) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(0, inputColVector2.getPisaTimestamp(i)) 0 ? 1 : 0; + } + } + } else if (inputColVector2.isRepeating) { + PisaTimestamp value2 = inputColVector2.getPisaTimestamp(0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value2) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value2) 0 ? 1 : 0; + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, inputColVector2.getPisaTimestamp(i)) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, inputColVector2.getPisaTimestamp(i)) 0 ? 1 : 0; + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 46534b4..f203687 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -18,24 +18,115 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - /** - * Generated from template TimestampColumnCompareTimestampScalar.txt, which covers comparison - * expressions between a timestamp column and a timestamp scalar. The boolean output - * is stored in a separate boolean column. + * Generated from template TimestampColumnCompareTimestampScalar.txt, which covers binary comparison + * expressions between a column and a scalar. The boolean output is stored in a + * separate boolean column. */ -public class extends { +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; - public (int colNum, long value, int outputColumn) { - super(colNum, value, outputColumn); + private int colNum; + private PisaTimestamp value; + private int outputColumn; + + public (int colNum, Timestamp value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp(value); + this.outputColumn = outputColumn; } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector1 = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector1.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector1.noNulls; + if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } + } else { + if (inputColVector1.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt new file mode 100644 index 0000000..3e4c263 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hive.common.util.DateUtils; + +/** + * Generated from template TimestampScalarArithmeticDateColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public ( value, int colNum, int outputColumn) { + super(((value)), colNum, outputColumn); + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumnBase.txt new file mode 100644 index 0000000..313cd85 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumnBase.txt @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +/** + * Generated from template TimestampScalarArithmeticDateColumnBase.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + + public (PisaTimestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type date. + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt new file mode 100644 index 0000000..b6a16a2 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticIntervalYearMonthColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (PisaTimestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type Interval_Year_Month (months). + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; + int n = batch.size; + + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputColVector.set(0, + dtm.addMonthsToPisaTimestamp(value, (int) vector[0], + scratchPisaTimestamp)); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.set(i, + dtm.addMonthsToPisaTimestamp(value, (int) vector[i], + scratchPisaTimestamp)); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("interval_year_month")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt new file mode 100644 index 0000000..31a6c08 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hive.common.util.DateUtils; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticTimestampColumn.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public ( value, int colNum, int outputColumn) { + super(((value)), colNum, outputColumn); + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumnBase.txt new file mode 100644 index 0000000..f0b3a46 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumnBase.txt @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.*; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +/* + * Because of the templatized nature of the code, either or both + * of these ColumnVector imports may be needed. Listing both of them + * rather than using ....vectorization.*; + */ +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; + +/** + * Generated from template TimestampScalarArithmeticTimestampColumnBase.txt. + * Implements a vectorized arithmetic operator with a scalar on the left and a + * column vector on the right. The result is output to an output column vector. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public (PisaTimestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public () { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type timestamp/interval_day_time. + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum]; + + // Output is type timestamp/interval_day_time. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.( + value, inputColVector2.getPisaTimestamp(0), 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, inputColVector2.getPisaTimestamp(i), i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, inputColVector2.getPisaTimestamp(i), i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.( + value, inputColVector2.getPisaTimestamp(i), i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.( + value, inputColVector2.getPisaTimestamp(i), i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareColumn.txt new file mode 100644 index 0000000..8890fad --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareColumn.txt @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.PisaTimestamp; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Generated from template ColumnCompareScalar.txt, which covers binary comparison + * expressions between a column and a scalar. The boolean output is stored in a + * separate boolean column. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (Timestamp value, int colNum, int outputColumn) { + super(new PisaTimestamp(value).(), colNum, outputColumn); + } + + public () { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp"), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 9468a66..223dc1e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -18,31 +18,116 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; import java.sql.Timestamp; -import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** - * Generated from template TimestampScalarCompareTimestampColumn.txt, which covers comparison - * expressions between a timestamp column and a timestamp scalar. The boolean output - * is stored in a separate boolean column. + * Generated from template ScalarCompareTimestamp.txt, which covers comparison + * expressions between a long/double scalar and a column. The boolean output is stored in a + * separate boolean column. */ -public class extends { +public class extends VectorExpression { - public (long value, int colNum, int outputColumn) { - super(value, colNum, outputColumn); + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + + public (Timestamp value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp(value); + this.outputColumn = outputColumn; } public () { - super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector2.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector2.noNulls; + if (inputColVector2.noNulls) { + if (inputColVector2.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } + } else { + if (inputColVector2.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + outNulls[i] = false; + } else { + //comparison with null is null + outNulls[i] = true; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; } @Override diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt new file mode 100644 index 0000000..08ed121 --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** +* . Vectorized implementation for MIN/MAX aggregates. +*/ +@Description(name = "", + value = "") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + * class for storing the current aggregate value. + */ + static private final class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private final PisaTimestamp value; + + /** + * Value is explicitly (re)initialized in reset() + */ + transient private boolean isNull = true; + + public Aggregation() { + value = new PisaTimestamp(); + } + + public void checkValue(TimestampColumnVector colVector, int index) { + if (isNull) { + isNull = false; + colVector.pisaTimestampUpdate(this.value, index); + } else if (colVector.compareTo(this.value, index) 0) { + colVector.pisaTimestampUpdate(this.value, index); + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset () { + isNull = true; + this.value.reset(); + } + } + + private VectorExpression inputExpression; + private transient VectorExpressionWriter resultWriter; + + public (VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public () { + super(); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( + desc.getParameters().get(0)); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize); + } + } + } else { + if (inputColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, inputColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColVector, batchSize, inputColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + // Repeating use index 0. + myagg.checkValue(inputColVector, 0); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColVector, selection[i]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColVector, i); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + // Repeating use index 0. + myagg.checkValue(inputColVector, 0); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + // Repeating use index 0. + myagg.checkValue(inputColVector, 0); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + j); + myagg.checkValue(inputColVector, i); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColVector, i); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls && + (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { + myagg.isNull = false; + inputColVector.pisaTimestampUpdate(myagg.value, 0); + } + return; + } + + if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, + batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, + batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + if (myagg.isNull) { + myagg.isNull = false; + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + else if (inputColVector.compareTo(myagg.value, i) 0) { + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + inputColVector.pisaTimestampUpdate(myagg.value, selected[0]); + myagg.isNull = false; + } + + for (int i=0; i< batchSize; ++i) { + int sel = selected[i]; + if (inputColVector.compareTo(myagg.value, sel) 0) { + inputColVector.pisaTimestampUpdate(myagg.value, sel); + } + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 0) { + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + if (myagg.isNull) { + inputColVector.pisaTimestampUpdate(myagg.value, 0); + myagg.isNull = false; + } + + for (int i=0;i 0) { + inputColVector.pisaTimestampUpdate(myagg.value, i); + } + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new Aggregation(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + Aggregation myAgg = (Aggregation) agg; + myAgg.reset(); + } + + @Override + public Object evaluateOutput( + AggregationBuffer agg) throws HiveException { + Aggregation myagg = (Aggregation) agg; + if (myagg.isNull) { + return null; + } + else { + return resultWriter.writeValue(myagg.value); + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { + return resultWriter.getObjectInspector(); + } + + @Override + public int getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2(), + model.memoryAlign()); + } + + public VectorExpression getInputExpression() { + return inputExpression; + } + + public void setInputExpression(VectorExpression inputExpression) { + this.inputExpression = inputExpression; + } +} + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java index 95dbf8d..ef63f94 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampUtils.java @@ -18,50 +18,24 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.sql.Timestamp; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; public final class TimestampUtils { - /** - * Store the given timestamp in nanoseconds into the timestamp object. - * @param timeInNanoSec Given timestamp in nanoseconds - * @param t The timestamp object - */ - public static void assignTimeInNanoSec(long timeInNanoSec, Timestamp t) { - /* - * java.sql.Timestamp consists of a long variable to store milliseconds and an integer variable for nanoseconds. - * The long variable is used to store only the full seconds converted to millis. For example for 1234 milliseconds, - * 1000 is stored in the long variable, and 234000000 (234 converted to nanoseconds) is stored as nanoseconds. - * The negative timestamps are also supported, but nanoseconds must be positive therefore millisecond part is - * reduced by one second. - */ - long integralSecInMillis = (timeInNanoSec / 1000000000) * 1000; // Full seconds converted to millis. - long nanos = timeInNanoSec % 1000000000; // The nanoseconds. - if (nanos < 0) { - nanos = 1000000000 + nanos; // The positive nano-part that will be added to milliseconds. - integralSecInMillis = ((timeInNanoSec / 1000000000) - 1) * 1000; // Reduce by one second. - } - t.setTime(integralSecInMillis); - t.setNanos((int) nanos); - } - - public static long getTimeNanoSec(Timestamp t) { - long time = t.getTime(); - int nanos = t.getNanos(); - return (time * 1000000) + (nanos % 1000000); - } + static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); + static final long NANOSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1); - public static long secondsToNanoseconds(long seconds) { - return seconds * 1000000000; - } - - public static long doubleToNanoseconds(double d) { - return (long) (d * 1000000000); + public static long daysToNanoseconds(long daysSinceEpoch) { + return DateWritable.daysToMillis((int) daysSinceEpoch) * NANOSECONDS_PER_MILLISECOND; } - public static long daysToNanoseconds(long daysSinceEpoch) { - return DateWritable.daysToMillis((int) daysSinceEpoch) * 1000000; + public static TimestampWritable timestampColumnVectorWritable( + TimestampColumnVector timestampColVector, int elementNum, + TimestampWritable timestampWritable) { + timestampColVector.timestampUpdate(timestampWritable.getTimestamp(), elementNum); + return timestampWritable; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index 92b4a07..d4e927a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.sql.Timestamp; import java.util.List; import org.slf4j.Logger; @@ -228,22 +227,32 @@ void assign(int batchIndex, Object object) { } } - private class TimestampAssigner extends AbstractLongAssigner { + private class TimestampAssigner extends Assigner { + + protected TimestampColumnVector colVector; TimestampAssigner(int columnIndex) { super(columnIndex); } @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (TimestampColumnVector) batch.cols[columnIndex]; + } + + @Override void assign(int batchIndex, Object object) { if (object == null) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { - TimestampWritable tw = (TimestampWritable) object; - Timestamp t = tw.getTimestamp(); - vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + colVector.set(batchIndex, ((TimestampWritable) object).getTimestamp()); } } + + @Override + void forgetColumnVector() { + colVector = null; + } } private class IntervalYearMonthAssigner extends AbstractLongAssigner { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index befe2fc..463c8a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -165,6 +165,17 @@ protected void assignDecimal(HiveDecimalWritable hdw, int index) { } } + private static abstract class VectorTimestampColumnAssign + extends VectorColumnAssignVectorBase { + + protected void assignTimestamp(Timestamp value, int index) { + outCol.set(index, value); + } + protected void assignTimestamp(TimestampWritable tw, int index) { + outCol.set(index, tw.getTimestamp()); + } + } + public static VectorColumnAssign[] buildAssigners(VectorizedRowBatch outputBatch) throws HiveException { @@ -313,19 +324,17 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { }.init(outputBatch, (LongColumnVector) destCol); break; case TIMESTAMP: - outVCA = new VectorLongColumnAssign() { + outVCA = new VectorTimestampColumnAssign() { @Override public void assignObjectValue(Object val, int destIndex) throws HiveException { if (val == null) { assignNull(destIndex); } else { - TimestampWritable bw = (TimestampWritable) val; - Timestamp t = bw.getTimestamp(); - assignLong(TimestampUtils.getTimeNanoSec(t), destIndex); + assignTimestamp((TimestampWritable) val, destIndex); } } - }.init(outputBatch, (LongColumnVector) destCol); + }.init(outputBatch, (TimestampColumnVector) destCol); break; case DATE: outVCA = new VectorLongColumnAssign() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java index 6673509..6727b59 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java @@ -22,8 +22,6 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -57,6 +55,11 @@ protected int[] decimalIndices; /** + * indices of TIMESTAMP primitive keys. + */ + protected int[] timestampIndices; + + /** * Helper class for looking up a key value based on key index. */ public class KeyLookupHelper { @@ -64,11 +67,13 @@ public int doubleIndex; public int stringIndex; public int decimalIndex; + public int timestampIndex; private static final int INDEX_UNUSED = -1; private void resetIndices() { - this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = INDEX_UNUSED; + this.longIndex = this.doubleIndex = this.stringIndex = this.decimalIndex = + timestampIndex = INDEX_UNUSED; } public void setLong(int index) { resetIndices(); @@ -89,6 +94,11 @@ public void setDecimal(int index) { resetIndices(); this.decimalIndex = index; } + + public void setTimestamp(int index) { + resetIndices(); + this.timestampIndex= index; + } } /** @@ -103,6 +113,7 @@ public void setDecimal(int index) { protected int doubleIndicesIndex; protected int stringIndicesIndex; protected int decimalIndicesIndex; + protected int timestampIndicesIndex; protected VectorColumnSetInfo(int keyCount) { this.keyCount = keyCount; @@ -117,6 +128,8 @@ protected VectorColumnSetInfo(int keyCount) { stringIndicesIndex = 0; decimalIndices = new int[this.keyCount]; decimalIndicesIndex = 0; + timestampIndices = new int[this.keyCount]; + timestampIndicesIndex = 0; indexLookup = new KeyLookupHelper[this.keyCount]; } @@ -153,6 +166,12 @@ protected void addKey(String outputType) throws HiveException { ++decimalIndicesIndex; break; + case TIMESTAMP: + timestampIndices[timestampIndicesIndex] = addIndex; + indexLookup[addIndex].setTimestamp(timestampIndicesIndex); + ++longIndicesIndex; + break; + default: throw new HiveException("Unexpected column vector type " + columnVectorType); } @@ -165,5 +184,6 @@ protected void finishAdding() { doubleIndices = Arrays.copyOf(doubleIndices, doubleIndicesIndex); stringIndices = Arrays.copyOf(stringIndices, stringIndicesIndex); decimalIndices = Arrays.copyOf(decimalIndices, decimalIndicesIndex); + timestampIndices = Arrays.copyOf(timestampIndices, timestampIndicesIndex); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index c56903e..97542df 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -22,8 +22,6 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -189,6 +187,32 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa } } + private class TimestampCopyRow extends CopyRow { + + TimestampCopyRow(int inColumnIndex, int outColumnIndex) { + super(inColumnIndex, outColumnIndex); + } + + @Override + void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { + TimestampColumnVector inColVector = (TimestampColumnVector) inBatch.cols[inColumnIndex]; + TimestampColumnVector outColVector = (TimestampColumnVector) outBatch.cols[outColumnIndex]; + + if (inColVector.isRepeating) { + if (inColVector.noNulls || !inColVector.isNull[0]) { + outColVector.setElement(outBatchIndex, 0, inColVector); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } else { + if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { + outColVector.setElement(outBatchIndex, inBatchIndex, inColVector); + } else { + VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); + } + } + } + } private CopyRow[] subRowToBatchCopiersByValue; private CopyRow[] subRowToBatchCopiersByReference; @@ -212,6 +236,10 @@ public void init(VectorColumnMapping columnMapping) throws HiveException { copyRowByValue = new LongCopyRow(inputColumn, outputColumn); break; + case TIMESTAMP: + copyRowByValue = new TimestampCopyRow(inputColumn, outputColumn); + break; + case DOUBLE: copyRowByValue = new DoubleCopyRow(inputColumn, outputColumn); break; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 4d86db6..1b4d8c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -20,7 +20,6 @@ import java.io.EOFException; import java.io.IOException; -import java.sql.Timestamp; import java.util.List; import org.slf4j.Logger; @@ -204,7 +203,7 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class TimestampReader extends AbstractLongReader { + private class TimestampReader extends Reader { DeserializeRead.ReadTimestampResults readTimestampResults; @@ -215,16 +214,16 @@ void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { @Override void apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (deserializeRead.readCheckNull()) { VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); } else { deserializeRead.readTimestamp(readTimestampResults); - Timestamp t = readTimestampResults.getTimestamp(); - colVector.vector[batchIndex] = TimestampUtils.getTimeNanoSec(t); + colVector.set(batchIndex, readTimestampResults.getTimestamp()); } } + } private class IntervalYearMonthReader extends AbstractLongReader { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index e221362..bcbb74a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -43,7 +43,7 @@ // LongColumnVector --> // INT_FAMILY // DATE - // TIMESTAMP + // INTERVAL_FAMILY // // DoubleColumnVector --> // FLOAT_FAMILY @@ -56,6 +56,9 @@ // CHAR // VARCHAR // + // TimestampColumnVector --> + // TIMESTAMP + // public enum ArgumentType { NONE (0x000), INT_FAMILY (0x001), @@ -71,8 +74,9 @@ INTERVAL_DAY_TIME (0x200), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), INTERVAL_FAMILY (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value), - INT_TIMESTAMP_FAMILY (INT_FAMILY.value | TIMESTAMP.value), INT_INTERVAL_FAMILY (INT_FAMILY.value | INTERVAL_FAMILY.value), + INT_DATE_INTERVAL_FAMILY (INT_FAMILY.value | DATE.value | INTERVAL_FAMILY.value), + INT_DATE_INTERVAL_YEAR_MONTH (INT_FAMILY.value | DATE.value | INTERVAL_YEAR_MONTH.value), INT_DATETIME_INTERVAL_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value | INTERVAL_FAMILY.value), STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value), ALL_FAMILY (0xFFF); @@ -146,10 +150,12 @@ public boolean isSameTypeOrFamily(ArgumentType other) { public static String getVectorColumnSimpleName(ArgumentType argType) { if (argType == INT_FAMILY || argType == DATE || - argType == TIMESTAMP || - argType == INTERVAL_YEAR_MONTH || - argType == INTERVAL_DAY_TIME) { + argType == INTERVAL_YEAR_MONTH + ) { return "Long"; + } else if (argType == TIMESTAMP || + argType == INTERVAL_DAY_TIME) { + return "Timestamp"; } else if (argType == FLOAT_FAMILY) { return "Double"; } else if (argType == DECIMAL) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index 4100bc5..1d38464 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -22,6 +22,7 @@ import java.sql.Date; import java.sql.Timestamp; import java.util.List; + import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; @@ -255,7 +256,9 @@ Object extract(int batchIndex) { } } - private class TimestampExtractor extends AbstractLongExtractor { + private class TimestampExtractor extends Extractor { + + protected TimestampColumnVector colVector; private Timestamp timestamp; @@ -266,23 +269,32 @@ Object extract(int batchIndex) { } @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (TimestampColumnVector) batch.cols[columnIndex]; + } + + @Override Object extract(int batchIndex) { int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; - TimestampUtils.assignTimeInNanoSec(value, timestamp); + colVector.timestampUpdate(timestamp, adjustedIndex); PrimitiveObjectInspectorFactory.writableTimestampObjectInspector.set(object, timestamp); return object; } else { return null; } } + + @Override + void forgetColumnVector() { + colVector = null; + } } private class IntervalYearMonthExtractor extends AbstractLongExtractor { private HiveIntervalYearMonth hiveIntervalYearMonth; - + IntervalYearMonthExtractor(int columnIndex) { super(columnIndex); object = PrimitiveObjectInspectorFactory.writableHiveIntervalYearMonthObjectInspector.create(new HiveIntervalYearMonth(0)); @@ -303,10 +315,12 @@ Object extract(int batchIndex) { } } - private class IntervalDayTimeExtractor extends AbstractLongExtractor { + private class IntervalDayTimeExtractor extends Extractor { + + protected TimestampColumnVector colVector; private HiveIntervalDayTime hiveIntervalDayTime; - + IntervalDayTimeExtractor(int columnIndex) { super(columnIndex); object = PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.create(new HiveIntervalDayTime(0, 0)); @@ -314,10 +328,22 @@ Object extract(int batchIndex) { } @Override + void setColumnVector(VectorizedRowBatch batch) { + colVector = (TimestampColumnVector) batch.cols[columnIndex]; + } + + @Override + void forgetColumnVector() { + colVector = null; + } + + + + @Override Object extract(int batchIndex) { int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); if (colVector.noNulls || !colVector.isNull[adjustedIndex]) { - long value = vector[adjustedIndex]; + long value = colVector.getTotalNanoseconds(adjustedIndex); DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, value); PrimitiveObjectInspectorFactory.writableHiveIntervalDayTimeObjectInspector.set(object, hiveIntervalDayTime); return object; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java index fabac38..9f0ac11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java @@ -19,8 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.DataOutputBuffer; @@ -121,5 +119,17 @@ public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outpu outputColumnVector.isNull[outputBatch.size] = true; } } + for(int i=0;i 0 ? new long[longValuesCount] : EMPTY_LONG_ARRAY; doubleValues = doubleValuesCount > 0 ? new double[doubleValuesCount] : EMPTY_DOUBLE_ARRAY; decimalValues = decimalValuesCount > 0 ? new HiveDecimalWritable[decimalValuesCount] : EMPTY_DECIMAL_ARRAY; + timestampValues = timestampValuesCount > 0 ? new PisaTimestamp[timestampValuesCount] : EMPTY_TIMESTAMP_ARRAY; for(int i = 0; i < decimalValuesCount; ++i) { decimalValues[i] = new HiveDecimalWritable(HiveDecimal.ZERO); } @@ -72,7 +78,11 @@ public VectorHashKeyWrapper(int longValuesCount, int doubleValuesCount, byteStarts = EMPTY_INT_ARRAY; byteLengths = EMPTY_INT_ARRAY; } - isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + decimalValuesCount]; + for(int i = 0; i < timestampValuesCount; ++i) { + timestampValues[i] = new PisaTimestamp(); + } + isNull = new boolean[longValuesCount + doubleValuesCount + byteValuesCount + + decimalValuesCount + timestampValuesCount]; hashcode = 0; } @@ -94,6 +104,10 @@ public void setHashKey() { hashcode ^= decimalValues[i].getHiveDecimal().hashCode(); } + for (int i = 0; i < timestampValues.length; i++) { + hashcode ^= timestampValues[i].hashCode(); + } + // This code, with branches and all, is not executed if there are no string keys for (int i = 0; i < byteValues.length; ++i) { /* @@ -131,6 +145,7 @@ public boolean equals(Object that) { Arrays.equals(longValues, keyThat.longValues) && Arrays.equals(doubleValues, keyThat.doubleValues) && Arrays.equals(decimalValues, keyThat.decimalValues) && + Arrays.equals(timestampValues, keyThat.timestampValues) && Arrays.equals(isNull, keyThat.isNull) && byteValues.length == keyThat.byteValues.length && (0 == byteValues.length || bytesEquals(keyThat)); @@ -196,6 +211,16 @@ public void duplicateTo(VectorHashKeyWrapper clone) { clone.byteStarts = EMPTY_INT_ARRAY; clone.byteLengths = EMPTY_INT_ARRAY; } + if (timestampValues.length > 0) { + clone.timestampValues = new PisaTimestamp[timestampValues.length]; + for(int i = 0; i < timestampValues.length; ++i) { + clone.timestampValues[i] = new PisaTimestamp(); + clone.timestampValues[i].update(timestampValues[i]); + } + } else { + clone.timestampValues = EMPTY_TIMESTAMP_ARRAY; + } + clone.hashcode = hashcode; assert clone.equals(this); } @@ -256,14 +281,32 @@ public void assignNullDecimal(int index) { isNull[longValues.length + doubleValues.length + byteValues.length + index] = true; } + public void assignTimestamp(int index, PisaTimestamp value) { + timestampValues[index].update(value); + isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + index] = false; + } + + public void assignTimestamp(int index, TimestampColumnVector colVector, int elementNum) { + colVector.pisaTimestampUpdate(timestampValues[index], elementNum); + isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + index] = false; + } + + public void assignNullTimestamp(int index) { + isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + index] = true; + } + @Override public String toString() { - return String.format("%d[%s] %d[%s] %d[%s] %d[%s]", + return String.format("%d[%s] %d[%s] %d[%s] %d[%s] %d[%s]", longValues.length, Arrays.toString(longValues), doubleValues.length, Arrays.toString(doubleValues), byteValues.length, Arrays.toString(byteValues), - decimalValues.length, Arrays.toString(decimalValues)); + decimalValues.length, Arrays.toString(decimalValues), + timestampValues.length, Arrays.toString(timestampValues)); } public boolean getIsLongNull(int i) { @@ -315,5 +358,15 @@ public boolean getIsDecimalNull(int i) { public HiveDecimalWritable getDecimal(int i) { return decimalValues[i]; } + + public boolean getIsTimestampNull(int i) { + return isNull[longValues.length + doubleValues.length + byteValues.length + + decimalValues.length + i]; + } + + public PisaTimestamp getTimestamp(int i) { + return timestampValues[i]; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java index 6333222..f292fbd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java @@ -18,13 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.util.Arrays; - import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; /** * Class for handling vectorized hash map key wrappers. It evaluates the key columns in a @@ -157,27 +155,49 @@ public void evaluateBatch(VectorizedRowBatch batch) throws HiveException { } } for(int i=0;i= 0) { + return kw.getIsTimestampNull(klh.timestampIndex)? null : + keyOutputWriter.writeValue( + kw.getTimestamp(klh.timestampIndex)); } else { throw new HiveException(String.format( - "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d", - i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex)); + "Internal inconsistent KeyLookupHelper at index [%d]:%d %d %d %d %d", + i, klh.longIndex, klh.doubleIndex, klh.stringIndex, klh.decimalIndex, + klh.timestampIndex)); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index c98c260..59adb48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -254,7 +254,7 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class TimestampWriter extends AbstractLongWriter { + private class TimestampWriter extends Writer { Timestamp scratchTimestamp; @@ -265,11 +265,11 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { @Override boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (colVector.isRepeating) { if (colVector.noNulls || !colVector.isNull[0]) { - TimestampUtils.assignTimeInNanoSec(colVector.vector[0], scratchTimestamp); + colVector.timestampUpdate(scratchTimestamp, 0); serializeWrite.writeTimestamp(scratchTimestamp); return true; } else { @@ -278,7 +278,7 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } else { if (colVector.noNulls || !colVector.isNull[batchIndex]) { - TimestampUtils.assignTimeInNanoSec(colVector.vector[batchIndex], scratchTimestamp); + colVector.timestampUpdate(scratchTimestamp, batchIndex); serializeWrite.writeTimestamp(scratchTimestamp); return true; } else { @@ -319,7 +319,7 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } - private class IntervalDayTimeWriter extends AbstractLongWriter { + private class IntervalDayTimeWriter extends Writer { IntervalDayTimeWriter(int columnIndex) { super(columnIndex); @@ -327,11 +327,12 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { @Override boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { - LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex]; + TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex]; if (colVector.isRepeating) { if (colVector.noNulls || !colVector.isNull[0]) { - serializeWrite.writeHiveIntervalDayTime(colVector.vector[0]); + serializeWrite.writeHiveIntervalDayTime( + colVector.getPisaTimestamp(0).getTotalNanoseconds()); return true; } else { serializeWrite.writeNull(); @@ -339,7 +340,8 @@ boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException { } } else { if (colVector.noNulls || !colVector.isNull[batchIndex]) { - serializeWrite.writeHiveIntervalDayTime(colVector.vector[batchIndex]); + serializeWrite.writeHiveIntervalDayTime( + colVector.getPisaTimestamp(batchIndex).getTotalNanoseconds()); return true; } else { serializeWrite.writeNull(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 7e95244..9c84025 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -50,22 +50,30 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode; import org.apache.hadoop.hive.ql.exec.vector.expressions.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastTimestampToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFStdPopTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFStdSampTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFVarPopTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFVarSampTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong; @@ -929,20 +937,16 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI case DATE: return new ConstantVectorExpression(outCol, DateWritable.dateToDays((Date) constantValue)); case TIMESTAMP: - return new ConstantVectorExpression(outCol, TimestampUtils.getTimeNanoSec((Timestamp) constantValue)); + return new ConstantVectorExpression(outCol, (Timestamp) constantValue); case INTERVAL_YEAR_MONTH: return new ConstantVectorExpression(outCol, ((HiveIntervalYearMonth) constantValue).getTotalMonths()); case INTERVAL_DAY_TIME: - return new ConstantVectorExpression(outCol, - DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) constantValue)); + return new ConstantVectorExpression(outCol, (HiveIntervalDayTime) constantValue); case FLOAT_FAMILY: return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue()); case DECIMAL: - VectorExpression ve = new ConstantVectorExpression(outCol, (HiveDecimal) constantValue); - // Set type name with decimal precision, scale, etc. - ve.setOutputType(typeName); - return ve; + return new ConstantVectorExpression(outCol, (HiveDecimal) constantValue, typeName); case STRING: return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes()); case CHAR: @@ -1240,8 +1244,8 @@ private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf, VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType); // Replace with the milliseconds conversion - if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestampViaLongToLong) { - ve = createVectorExpression(CastMillisecondsLongToTimestampViaLongToLong.class, + if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestamp) { + ve = createVectorExpression(CastMillisecondsLongToTimestamp.class, childExpr, Mode.PROJECTION, returnType); } @@ -1526,13 +1530,13 @@ private VectorExpression getInExpression(List childExpr, Mode mode expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType); ((ILongInExpr) expr).setInListValues(inVals); } else if (isTimestampFamily(colType)) { - cl = (mode == Mode.FILTER ? FilterLongColumnInList.class : LongColumnInList.class); - long[] inVals = new long[childrenForInList.size()]; + cl = (mode == Mode.FILTER ? FilterTimestampColumnInList.class : TimestampColumnInList.class); + Timestamp[] inVals = new Timestamp[childrenForInList.size()]; for (int i = 0; i != inVals.length; i++) { inVals[i] = getTimestampScalar(childrenForInList.get(i)); } expr = createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION, returnType); - ((ILongInExpr) expr).setInListValues(inVals); + ((ITimestampInExpr) expr).setInListValues(inVals); } else if (isStringFamily(colType)) { cl = (mode == Mode.FILTER ? FilterStringColumnInList.class : StringColumnInList.class); byte[][] inVals = new byte[childrenForInList.size()][]; @@ -1834,7 +1838,7 @@ private VectorExpression getCastToDoubleExpression(Class udf, List childExpr cl = FilterCharColumnBetween.class; } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { cl = FilterCharColumnNotBetween.class; - } else if (colType.equals("timestamp")) { - - // Get timestamp boundary values as longs instead of the expected strings - long left = getTimestampScalar(childExpr.get(2)); - long right = getTimestampScalar(childExpr.get(3)); - childrenAfterNot = new ArrayList(); - childrenAfterNot.add(colExpr); - childrenAfterNot.add(new ExprNodeConstantDesc(left)); - childrenAfterNot.add(new ExprNodeConstantDesc(right)); - if (notKeywordPresent) { - cl = FilterLongColumnNotBetween.class; - } else { - cl = FilterLongColumnBetween.class; - } + } else if (colType.equals("timestamp") && !notKeywordPresent) { + cl = FilterTimestampColumnBetween.class; + } else if (colType.equals("timestamp") && notKeywordPresent) { + cl = FilterTimestampColumnNotBetween.class; } else if (isDecimalFamily(colType) && !notKeywordPresent) { cl = FilterDecimalColumnBetween.class; } else if (isDecimalFamily(colType) && notKeywordPresent) { @@ -2056,6 +2050,7 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) // Make vectorized operator String normalizedName = getNormalizedName(resultTypeName); + VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, normalizedName, argDescs); // Set child expressions @@ -2173,21 +2168,17 @@ private Object getVectorTypeScalarValue(ExprNodeConstantDesc constDesc) throws H VectorExpression.Type type = VectorExpression.Type.getValue(t); Object scalarValue = getScalarValue(constDesc); switch (type) { - case TIMESTAMP: - return TimestampUtils.getTimeNanoSec((Timestamp) scalarValue); case DATE: return DateWritable.dateToDays((Date) scalarValue); case INTERVAL_YEAR_MONTH: return ((HiveIntervalYearMonth) scalarValue).getTotalMonths(); - case INTERVAL_DAY_TIME: - return DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) scalarValue); default: return scalarValue; } } - // Get a timestamp as a long in number of nanos, from a string constant or cast - private long getTimestampScalar(ExprNodeDesc expr) throws HiveException { + // Get a timestamp from a string constant or cast + private Timestamp getTimestampScalar(ExprNodeDesc expr) throws HiveException { if (expr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) expr).getGenericUDF() instanceof GenericUDFTimestamp) { return evaluateCastToTimestamp(expr); @@ -2215,7 +2206,7 @@ private long getTimestampScalar(ExprNodeDesc expr) throws HiveException { + "Expecting string."); } - private long evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { + private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { ExprNodeGenericFuncDesc expr2 = (ExprNodeGenericFuncDesc) expr; ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr2); ObjectInspector output = evaluator.initialize(null); @@ -2226,7 +2217,7 @@ private long evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { throw new HiveException("Udf: failed to convert to timestamp"); } Timestamp ts = (Timestamp) java; - return TimestampUtils.getTimeNanoSec(ts); + return ts; } private Constructor getConstructor(Class cl) throws HiveException { @@ -2315,7 +2306,7 @@ public static String mapTypeNameSynonyms(String typeName) { } } - public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) throws HiveException { + public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) { switch (typeInfo.getCategory()) { case STRUCT: return Type.STRUCT; @@ -2336,11 +2327,13 @@ public static String mapTypeNameSynonyms(String typeName) { case INT: case LONG: case DATE: - case TIMESTAMP: case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: return ColumnVector.Type.LONG; + case INTERVAL_DAY_TIME: + case TIMESTAMP: + return ColumnVector.Type.TIMESTAMP; + case FLOAT: case DOUBLE: return ColumnVector.Type.DOUBLE; @@ -2369,47 +2362,58 @@ public static String mapTypeNameSynonyms(String typeName) { // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively // marked map-side (HASH). static ArrayList aggregatesDefinition = new ArrayList() {{ - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY, null, VectorUDAFMinLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATE_INTERVAL_FAMILY, null, VectorUDAFMinLong.class)); add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY, null, VectorUDAFMaxLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, null, VectorUDAFMinTimestamp.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATE_INTERVAL_FAMILY, null, VectorUDAFMaxLong.class)); add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, null, VectorUDAFMaxTimestamp.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_INTERVAL_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATE_INTERVAL_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFAvgTimestamp.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFVarPopTimestamp.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFVarPopTimestamp.class)); + add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFVarSampTimestamp.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdPopTimestamp.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdPopTimestamp.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdPopTimestamp.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.TIMESTAMP, GroupByDesc.Mode.HASH, VectorUDAFStdSampTimestamp.class)); }}; public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduceMergePartial) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 9b90f37..a68d0cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -141,11 +141,12 @@ public static ColumnVector createColumnVector(TypeInfo typeInfo) { case SHORT: case INT: case LONG: - case TIMESTAMP: case DATE: case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case INTERVAL_DAY_TIME: + case TIMESTAMP: + return new TimestampColumnVector(VectorizedRowBatch.DEFAULT_SIZE); case FLOAT: case DOUBLE: return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); @@ -393,13 +394,12 @@ private static void setVector(Object row, } break; case TIMESTAMP: { - LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex]; + TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex]; if (writableCol != null) { - Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); - lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t); + lcv.set(rowIndex, ((TimestampWritable) writableCol).getTimestamp()); lcv.isNull[rowIndex] = false; } else { - lcv.vector[rowIndex] = 1; + lcv.setNullValue(rowIndex); setNullColIsNullValue(lcv, rowIndex); } } @@ -583,6 +583,8 @@ static ColumnVector cloneColumnVector(ColumnVector source return new DecimalColumnVector(decColVector.vector.length, decColVector.precision, decColVector.scale); + } else if (source instanceof TimestampColumnVector) { + return new TimestampColumnVector(((TimestampColumnVector) source).getLength()); } else if (source instanceof ListColumnVector) { ListColumnVector src = (ListColumnVector) source; ColumnVector child = cloneColumnVector(src.child); @@ -682,6 +684,10 @@ public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, Strin } } else if (colVector instanceof DecimalColumnVector) { sb.append(((DecimalColumnVector) colVector).vector[index].toString()); + } else if (colVector instanceof TimestampColumnVector) { + Timestamp timestamp = new Timestamp(0); + ((TimestampColumnVector) colVector).timestampUpdate(timestamp, index); + sb.append(timestamp.toString()); } else { sb.append("Unknown"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 0ec91b8..ceea4c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -375,13 +375,13 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch, Object[] partition break; case TIMESTAMP: { - LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[colIndex]; if (value == null) { lcv.noNulls = false; lcv.isNull[0] = true; lcv.isRepeating = true; - } else { - lcv.fill(TimestampUtils.getTimeNanoSec((Timestamp) value)); + } else { + lcv.fill((Timestamp) value); lcv.isNull[0] = false; } } @@ -400,13 +400,13 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch, Object[] partition } case INTERVAL_DAY_TIME: { - LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[colIndex]; if (value == null) { lcv.noNulls = false; lcv.isNull[0] = true; lcv.isRepeating = true; } else { - lcv.fill(DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) value)); + lcv.fillTotalNanoseconds(DateUtils.getIntervalDayTimeTotalNanos((HiveIntervalDayTime) value)); lcv.isNull[0] = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java index a52cf19..2b0068d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java @@ -18,20 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; /** * Type cast decimal to timestamp. The decimal value is interpreted * as NNNN.DDDDDDDDD where NNNN is a number of seconds and DDDDDDDDD * is a number of nano-seconds. */ -public class CastDecimalToTimestamp extends FuncDecimalToLong { +public class CastDecimalToTimestamp extends FuncDecimalToTimestamp { private static final long serialVersionUID = 1L; - private static transient HiveDecimal tenE9 = HiveDecimal.create(1000000000); - public CastDecimalToTimestamp(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); } @@ -40,13 +43,8 @@ public CastDecimalToTimestamp() { } @Override - protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { - HiveDecimal result = inV.vector[i].getHiveDecimal().multiply(tenE9); - if (result == null) { - outV.noNulls = false; - outV.isNull[i] = true; - } else { - outV.vector[i] = result.longValue(); - } + protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i) { + Timestamp timestamp = TimestampWritable.decimalToTimestamp(inV.vector[i].getHiveDecimal()); + outV.set(i, timestamp); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java new file mode 100644 index 0000000..bfbedfc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastDoubleToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastDoubleToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastDoubleToTimestamp() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + double[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputColVector.setEpochSeconds(0, vector[0]); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setEpochSeconds(i, vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setEpochSeconds(i, vector[i]); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setEpochSeconds(i, vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setEpochSeconds(i, vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("double")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java index 32cefea..ceefd61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToDate.java @@ -63,62 +63,6 @@ public void evaluate(VectorizedRowBatch batch) { } switch (inputTypes[0]) { - case TIMESTAMP: - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; - date.setTime(inV.vector[0] / 1000000); - outV.vector[0] = DateWritable.dateToDays(date); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - outV.isRepeating = false; - } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - date.setTime(inV.vector[0] / 1000000); - outV.vector[0] = DateWritable.dateToDays(date); - } - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - } - outV.isRepeating = false; - } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { - date.setTime(inV.vector[i] / 1000000); - outV.vector[i] = DateWritable.dateToDays(date); - } - } - outV.isRepeating = false; - } - } - break; - case DATE: inV.copySelected(batch.selectedInUse, batch.selected, batch.size, outV); break; @@ -155,7 +99,7 @@ public String getOutputType() { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATETIME_FAMILY) + VectorExpressionDescriptor.ArgumentType.DATE) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java new file mode 100644 index 0000000..373adea --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastLongToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastLongToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastLongToTimestamp() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputColVector.setEpochSeconds(0, vector[0]); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setEpochSeconds(i, vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setEpochSeconds(i, vector[i]); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setEpochSeconds(i, vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setEpochSeconds(i, vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java new file mode 100644 index 0000000..dc2b5e1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastMillisecondsLongToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastMillisecondsLongToTimestamp(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastMillisecondsLongToTimestamp() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputColVector.setEpochMilliseconds(0, vector[0]); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setEpochMilliseconds(i, vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setEpochMilliseconds(i, vector[i]); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.setEpochMilliseconds(i, vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.setEpochMilliseconds(i, vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index 518d5d5..143ca47 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde.serdeConstants; @@ -55,7 +56,7 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inV = (BytesColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumn]; if (n == 0) { @@ -112,13 +113,13 @@ public void evaluate(VectorizedRowBatch batch) { } } - private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { + private void evaluate(TimestampColumnVector outV, BytesColumnVector inV, int i) { try { HiveIntervalDayTime interval = HiveIntervalDayTime.valueOf( new String(inV.vector[i], inV.start[i], inV.length[i], "UTF-8")); - outV.vector[i] = DateUtils.getIntervalDayTimeTotalNanos(interval); + outV.setEpochNanoseconds(i, DateUtils.getIntervalDayTimeTotalNanos(interval)); } catch (Exception e) { - outV.vector[i] = 1; + outV.setNullValue(i); outV.isNull[i] = true; outV.noNulls = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java new file mode 100644 index 0000000..4729340 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToBoolean extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToBoolean(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToBoolean() { + super(); + } + + private int toBool(TimestampColumnVector timestampColVector, int index) { + return (timestampColVector.getEpochDay(index) != 0 || + timestampColVector.getNanoOfDay(index) != 0) ? 1 : 0; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = toBool(inputColVector, 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = toBool(inputColVector, i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = toBool(inputColVector, i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = toBool(inputColVector, i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = toBool(inputColVector, i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java index 0aedddc..86e35ee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDecimal.java @@ -20,12 +20,12 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; /** * To be used to cast timestamp to decimal. */ -public class CastTimestampToDecimal extends FuncLongToDecimal { +public class CastTimestampToDecimal extends FuncTimestampToDecimal { private static final long serialVersionUID = 1L; @@ -38,12 +38,12 @@ public CastTimestampToDecimal(int inputColumn, int outputColumn) { } @Override - protected void func(DecimalColumnVector outV, LongColumnVector inV, int i) { + protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i) { - // The resulting decimal value is 10e-9 * the input long value (i.e. seconds). - // - HiveDecimal result = HiveDecimal.create(inV.vector[i]); - result = result.scaleByPowerOfTen(-9); + // The BigDecimal class recommends not converting directly from double to BigDecimal, + // so we convert like the non-vectorized case and got through a string... + Double timestampDouble = inV.getEpochSecondsWithFractionalNanos(i); + HiveDecimal result = HiveDecimal.create(timestampDouble.toString()); outV.set(i, result); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java new file mode 100644 index 0000000..43a1d5e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToDouble extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToDouble(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToDouble() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + double[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.getEpochSecondsWithFractionalNanos(0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSecondsWithFractionalNanos(i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSecondsWithFractionalNanos(i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSecondsWithFractionalNanos(i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSecondsWithFractionalNanos(i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "double"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java new file mode 100644 index 0000000..6951a4f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +public class CastTimestampToLong extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public CastTimestampToLong(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public CastTimestampToLong() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = inputColVector.getEpochSeconds(0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSeconds(i); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSeconds(i); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.getEpochSeconds(i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.getEpochSeconds(i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("timestamp")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index 8d75cf3..7efd2a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -18,10 +18,19 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hive.common.util.DateUtils; /** * Constant is represented as a vector with repeating values. @@ -30,21 +39,15 @@ private static final long serialVersionUID = 1L; - private static enum Type { - LONG, - DOUBLE, - BYTES, - DECIMAL - } - private int outputColumn; protected long longValue = 0; private double doubleValue = 0; private byte[] bytesValue = null; private HiveDecimal decimalValue = null; + private PisaTimestamp timestampValue = null; private boolean isNullValue = false; - private Type type; + private ColumnVector.Type type; private int bytesValueLength = 0; public ConstantVectorExpression() { @@ -82,11 +85,22 @@ public ConstantVectorExpression(int outputColumn, HiveVarchar value) { setBytesValue(value.getValue().getBytes()); } - public ConstantVectorExpression(int outputColumn, HiveDecimal value) { - this(outputColumn, "decimal"); + // Include type name for precision/scale. + public ConstantVectorExpression(int outputColumn, HiveDecimal value, String typeName) { + this(outputColumn, typeName); setDecimalValue(value); } + public ConstantVectorExpression(int outputColumn, Timestamp value) { + this(outputColumn, "timestamp"); + setTimestampValue(value); + } + + public ConstantVectorExpression(int outputColumn, HiveIntervalDayTime value) { + this(outputColumn, "timestamp"); + setIntervalDayTimeValue(value); + } + /* * Support for null constant object */ @@ -140,6 +154,17 @@ private void evaluateDecimal(VectorizedRowBatch vrg) { } } + private void evaluateTimestamp(VectorizedRowBatch vrg) { + TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumn]; + dcv.isRepeating = true; + dcv.noNulls = !isNullValue; + if (!isNullValue) { + dcv.set(0, timestampValue); + } else { + dcv.isNull[0] = true; + } + } + @Override public void evaluate(VectorizedRowBatch vrg) { switch (type) { @@ -155,6 +180,9 @@ public void evaluate(VectorizedRowBatch vrg) { case DECIMAL: evaluateDecimal(vrg); break; + case TIMESTAMP: + evaluateTimestamp(vrg); + break; } } @@ -192,39 +220,38 @@ public void setDecimalValue(HiveDecimal decimalValue) { this.decimalValue = decimalValue; } - public String getTypeString() { - return getOutputType(); + public HiveDecimal getDecimalValue() { + return decimalValue; } - public void setTypeString(String typeString) { - this.outputType = typeString; - if (VectorizationContext.isStringFamily(typeString)) { - this.type = Type.BYTES; - } else if (VectorizationContext.isFloatFamily(typeString)) { - this.type = Type.DOUBLE; - } else if (VectorizationContext.isDecimalFamily(typeString)){ - this.type = Type.DECIMAL; - } else { - // everything else that does not belong to string, double, decimal is treated as long. - this.type = Type.LONG; - } + public void setTimestampValue(Timestamp timestampValue) { + this.timestampValue = new PisaTimestamp(timestampValue); } - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; + public void setIntervalDayTimeValue(HiveIntervalDayTime intervalDayTimeValue) { + this.timestampValue = new PisaTimestamp().updateFromEpochNanoseconds( + DateUtils.getIntervalDayTimeTotalNanos(intervalDayTimeValue)); + } + + + public PisaTimestamp getTimestampValue() { + return timestampValue; } - public Type getType() { - return type; + public String getTypeString() { + return getOutputType(); } - public void setType(Type type) { - this.type = type; + private void setTypeString(String typeString) { + this.outputType = typeString; + + String typeName = VectorizationContext.mapTypeNameSynonyms(outputType); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + this.type = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); } - @Override - public void setOutputType(String type) { - setTypeString(type); + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java new file mode 100644 index 0000000..86b90d8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateColumn.java @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateColSubtractDateColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp1; + private PisaTimestamp scratchPisaTimestamp2; + private DateTimeMath dtm = new DateTimeMath(); + + public DateColSubtractDateColumn(int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + scratchPisaTimestamp1 = new PisaTimestamp(); + scratchPisaTimestamp2 = new PisaTimestamp(); + } + + public DateColSubtractDateColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + + // Output is type interval_day_time. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = + inputColVector1.isRepeating && inputColVector2.isRepeating + || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] + || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + scratchPisaTimestamp2.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + scratchPisaTimestamp1.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1, + scratchPisaTimestamp2.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + scratchPisaTimestamp1.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])); + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1, + scratchPisaTimestamp2.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + scratchPisaTimestamp2.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2, + i); + } + } else { + scratchPisaTimestamp2.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[0])); + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2, + i); + } + } + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp1.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp1.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[i])), + scratchPisaTimestamp2.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesTimestamp(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java new file mode 100644 index 0000000..6f415e9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateColSubtractDateScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public DateColSubtractDateScalar(int colNum, long value, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromEpochMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public DateColSubtractDateScalar() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #1 is type date (epochDays). + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector1.noNulls; + outputColVector.isRepeating = inputColVector1.isRepeating; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector1.isRepeating) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + 0); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector1.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector1[0])), + value, + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java new file mode 100644 index 0000000..d236405 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; + +// A type date (LongColumnVector storing epoch days) minus a type date produces a +// type interval_day_time (TimestampColumnVector storing nanosecond interval in 2 longs). +public class DateScalarSubtractDateColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private PisaTimestamp value; + private int outputColumn; + private PisaTimestamp scratchPisaTimestamp; + private DateTimeMath dtm = new DateTimeMath(); + + public DateScalarSubtractDateColumn(long value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = new PisaTimestamp().updateFromEpochMilliseconds(DateWritable.daysToMillis((int) value)); + this.outputColumn = outputColumn; + scratchPisaTimestamp = new PisaTimestamp(); + } + + public DateScalarSubtractDateColumn() { + } + + @Override + /** + * Method to evaluate scalar-column operation in vectorized fashion. + * + * @batch a package of rows with each column stored in a vector + */ + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + // Input #2 is type date (epochDays). + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum]; + + // Output is type Timestamp. + TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumn]; + + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector2.noNulls; + outputColVector.isRepeating = inputColVector2.isRepeating; + int n = batch.size; + + long[] vector2 = inputColVector2.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector2.isRepeating) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[0])), + 0); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + } else if (inputColVector2.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + } + } else { /* there are nulls */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.subtract( + value, + scratchPisaTimestamp.updateFromEpochMilliseconds(DateWritable.daysToMillis((int) vector2[i])), + i); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("date"), + VectorExpressionDescriptor.ArgumentType.getType("date")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java new file mode 100644 index 0000000..42e4984 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterTimestampColumnInList.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; +import java.util.HashSet; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Evaluate IN filter on a batch for a vector of timestamps. + */ +public class FilterTimestampColumnInList extends VectorExpression implements ITimestampInExpr { + private static final long serialVersionUID = 1L; + private int inputCol; + private Timestamp[] inListValues; + + // The set object containing the IN list. + private transient HashSet inSet; + + public FilterTimestampColumnInList() { + super(); + inSet = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public FilterTimestampColumnInList(int colNum) { + this.inputCol = colNum; + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new HashSet(inListValues.length); + for (Timestamp val : inListValues) { + inSet.add(new PisaTimestamp(val)); + } + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + PisaTimestamp scratchTimestamp = new PisaTimestamp(); + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + if (!(inSet.contains(scratchTimestamp))) { + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + if (!inSet.contains(scratchTimestamp)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + } + + // Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + if (inSet.contains(scratchTimestamp)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + public void setInListValues(Timestamp[] a) { + this.inListValues = a; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java new file mode 100644 index 0000000..561c152 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary decimal functions and expressions returning timestamps that + * operate directly on the input and set the output. + */ +public abstract class FuncDecimalToTimestamp extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncDecimalToTimestamp(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + } + + public FuncDecimalToTimestamp() { + super(); + } + + abstract protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "timestamp"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.DECIMAL) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java new file mode 100644 index 0000000..774551c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This is a superclass for unary timestamp functions and expressions returning decimals that + * operate directly on the input and set the output. + */ +public abstract class FuncTimestampToDecimal extends VectorExpression { + private static final long serialVersionUID = 1L; + int inputColumn; + int outputColumn; + + public FuncTimestampToDecimal(int inputColumn, int outputColumn) { + this.inputColumn = inputColumn; + this.outputColumn = outputColumn; + this.outputType = "decimal"; + } + + public FuncTimestampToDecimal() { + super(); + this.outputType = "decimal"; + } + + abstract protected void func(DecimalColumnVector outV, TimestampColumnVector inV, int i); + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + TimestampColumnVector inV = (TimestampColumnVector) batch.cols[inputColumn]; + int[] sel = batch.selected; + int n = batch.size; + DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumn]; + + if (n == 0) { + + // Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + + // Handle case with nulls. Don't do function if the value is null, + // because the data may be undefined for a null value. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getInputColumn() { + return inputColumn; + } + + public void setInputColumn(int inputColumn) { + this.inputColumn = inputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.TIMESTAMP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ITimestampInExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ITimestampInExpr.java new file mode 100644 index 0000000..f6cc971 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ITimestampInExpr.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; + +public interface ITimestampInExpr { + void setInListValues(Timestamp[] inVals); +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java index 2eb48fb..3c6824d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; /** * Utility functions to handle null propagation. @@ -81,6 +82,31 @@ public static void setNullDataEntriesBytes( } } + /** + * Set the data value for all NULL entries to the designated NULL_VALUE. + */ + public static void setNullDataEntriesTimestamp( + TimestampColumnVector v, boolean selectedInUse, int[] sel, int n) { + if (v.noNulls) { + return; + } else if (v.isRepeating && v.isNull[0]) { + v.setNullValue(0); + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if(v.isNull[i]) { + v.setNullValue(i); + } + } + } else { + for (int i = 0; i != n; i++) { + if(v.isNull[i]) { + v.setNullValue(i); + } + } + } + } + // for use by Column-Scalar and Scalar-Column arithmetic for null propagation public static void setNullOutputEntriesColScalar( ColumnVector v, boolean selectedInUse, int[] sel, int n) { @@ -89,8 +115,11 @@ public static void setNullOutputEntriesColScalar( // No need to set null data entries because the input NaN values // will automatically propagate to the output. return; + } else if (v instanceof LongColumnVector) { + setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n); + } else if (v instanceof TimestampColumnVector){ + setNullDataEntriesTimestamp((TimestampColumnVector) v, selectedInUse, sel, n); } - setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java new file mode 100644 index 0000000..2d7d0c2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.sql.Timestamp; +import java.util.HashSet; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Output a boolean value indicating if a column is IN a list of constants. + */ +public class TimestampColumnInList extends VectorExpression implements ITimestampInExpr { + private static final long serialVersionUID = 1L; + private int inputCol; + private Timestamp[] inListValues; + private int outputColumn; + + private transient PisaTimestamp scratchTimestamp; + + + // The set object containing the IN list. + private transient HashSet inSet; + + public TimestampColumnInList() { + super(); + inSet = null; + } + + /** + * After construction you must call setInListValues() to add the values to the IN set. + */ + public TimestampColumnInList(int colNum, int outputColumn) { + this.inputCol = colNum; + this.outputColumn = outputColumn; + inSet = null; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + if (inSet == null) { + inSet = new HashSet(inListValues.length); + for (Timestamp val : inListValues) { + inSet.add(new PisaTimestamp(val)); + } + scratchTimestamp = new PisaTimestamp(); + } + + TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNulls = outputColVector.isNull; + int n = batch.size; + long[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + outputVector[0] = inSet.contains(scratchTimestamp) ? 1 : 0; + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } + } else { + if (inputColVector.isRepeating) { + + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!nullPos[0]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, 0); + outputVector[0] = inSet.contains(scratchTimestamp) ? 1 : 0; + outNulls[0] = false; + } else { + outNulls[0] = true; + } + outputColVector.isRepeating = true; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNulls[i] = nullPos[i]; + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } + } else { + System.arraycopy(nullPos, 0, outNulls, 0, n); + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + inputColVector.pisaTimestampUpdate(scratchTimestamp, i); + outputVector[i] = inSet.contains(scratchTimestamp) ? 1 : 0; + } + } + } + } + } + + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public Descriptor getDescriptor() { + + // This VectorExpression (IN) is a special case, so don't return a descriptor. + return null; + } + + public void setInListValues(Timestamp[] a) { + this.inListValues = a; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index c0e4cf0..8fca8a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -144,6 +144,8 @@ public String toString() { b.append(this.getClass().getSimpleName()); b.append("["); b.append(this.getOutputColumn()); + b.append(":"); + b.append(this.getOutputType()); b.append("]"); if (childExpressions != null) { b.append("("); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java index d91b880..326bfb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriter.java @@ -18,12 +18,15 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.io.Writable; /** * Interface used to create Writable objects from vector expression primitives. @@ -37,6 +40,9 @@ Object writeValue(byte[] value, int start, int length) throws HiveException; Object writeValue(HiveDecimalWritable value) throws HiveException; Object writeValue(HiveDecimal value) throws HiveException; + Object writeValue(TimestampWritable value) throws HiveException; + Object writeValue(Timestamp value) throws HiveException; + Object writeValue(PisaTimestamp value) throws HiveException; Object setValue(Object row, ColumnVector column, int columnRow) throws HiveException; Object initValue(Object ost) throws HiveException; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index bbf8862..d9f33e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.PisaTimestamp; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -162,6 +163,44 @@ public Object setValue(Object field, HiveDecimalWritable value) throws HiveExcep public Object setValue(Object field, HiveDecimal value) throws HiveException { throw new HiveException("Internal error: should not reach here"); } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + @Override + public Object writeValue(Timestamp value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + @Override + public Object writeValue(TimestampWritable value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + public Object setValue(Object field, TimestampWritable value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the MutableTimestamp specialization + */ + @Override + public Object writeValue(PisaTimestamp value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } + + /** + * The base implementation must be overridden by the Timestamp specialization + */ + public Object setValue(Object field, Timestamp value) throws HiveException { + throw new HiveException("Internal error: should not reach here"); + } } /** @@ -366,6 +405,66 @@ public Object setValue(Object field, ColumnVector column, int row) throws HiveEx } } + /** + * Specialized writer for TimestampColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterTimestamp extends VectorExpressionWriterBase { + @Override + public Object writeValue(ColumnVector column, int row) throws HiveException { + TimestampColumnVector dcv = (TimestampColumnVector) column; + TimestampWritable timestampWritable = (TimestampWritable) dcv.getScratchWritable(); + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + dcv.setScratchWritable(timestampWritable); + } + if (dcv.noNulls && !dcv.isRepeating) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (dcv.noNulls && dcv.isRepeating) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return writeValue(TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + + @Override + public Object setValue(Object field, ColumnVector column, int row) throws HiveException { + TimestampColumnVector dcv = (TimestampColumnVector) column; + TimestampWritable timestampWritable = (TimestampWritable) dcv.getScratchWritable(); + if (timestampWritable == null) { + timestampWritable = new TimestampWritable(); + dcv.setScratchWritable(timestampWritable); + } + if (dcv.noNulls && !dcv.isRepeating) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (dcv.noNulls && dcv.isRepeating) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && !dcv.isRepeating && !dcv.isNull[row]) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, row, timestampWritable)); + } else if (!dcv.noNulls && !dcv.isRepeating && dcv.isNull[row]) { + return null; + } else if (!dcv.noNulls && dcv.isRepeating && !dcv.isNull[0]) { + return setValue(field, TimestampUtils.timestampColumnVectorWritable(dcv, 0, timestampWritable)); + } else if (!dcv.noNulls && dcv.isRepeating && dcv.isNull[0]) { + return null; + } + throw new HiveException( + String.format( + "Incorrect null/repeating: row:%d noNulls:%b isRepeating:%b isNull[row]:%b isNull[0]:%b", + row, dcv.noNulls, dcv.isRepeating, dcv.isNull[row], dcv.isNull[0])); + } + } + /** * Compiles the appropriate vector expression writer based on an expression info (ExprNodeDesc) */ @@ -514,6 +613,22 @@ public Object setValue(Object field, HiveDecimal value) { } @Override + public Object setValue(Object field, TimestampWritable value) { + if (null == field) { + field = initValue(null); + } + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); + } + + @Override + public Object setValue(Object field, Timestamp value) { + if (null == field) { + field = initValue(null); + } + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); + } + + @Override public Object initValue(Object ignored) { return ((SettableHiveDecimalObjectInspector) this.objectInspector).create( HiveDecimal.ZERO); @@ -560,41 +675,48 @@ public Object initValue(Object ignored) { } private static VectorExpressionWriter genVectorExpressionWritableTimestamp( - SettableTimestampObjectInspector fieldObjInspector) throws HiveException { - return new VectorExpressionWriterLong() { + SettableTimestampObjectInspector fieldObjInspector) throws HiveException { + + return new VectorExpressionWriterTimestamp() { private Object obj; - private Timestamp ts; - public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector) - throws HiveException { + public VectorExpressionWriter init(SettableTimestampObjectInspector objInspector) throws HiveException { super.init(objInspector); - ts = new Timestamp(0); obj = initValue(null); return this; } @Override - public Object writeValue(long value) { - TimestampUtils.assignTimeInNanoSec(value, ts); - ((SettableTimestampObjectInspector) this.objectInspector).set(obj, ts); - return obj; + public Object writeValue(TimestampWritable value) throws HiveException { + return ((SettableTimestampObjectInspector) this.objectInspector).set(obj, value); } @Override - public Object setValue(Object field, long value) { + public Object writeValue(Timestamp value) throws HiveException { + return ((SettableTimestampObjectInspector) this.objectInspector).set(obj, value); + } + + @Override + public Object setValue(Object field, TimestampWritable value) { if (null == field) { field = initValue(null); } - TimestampUtils.assignTimeInNanoSec(value, ts); - ((SettableTimestampObjectInspector) this.objectInspector).set(field, ts); - return field; + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); + } + + @Override + public Object setValue(Object field, Timestamp value) { + if (null == field) { + field = initValue(null); + } + return ((SettableTimestampObjectInspector) this.objectInspector).set(field, value); } @Override public Object initValue(Object ignored) { return ((SettableTimestampObjectInspector) this.objectInspector).create(new Timestamp(0)); } - }.init(fieldObjInspector); + }.init(fieldObjInspector); } private static VectorExpressionWriter genVectorExpressionWritableIntervalYearMonth( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java index 9883fe6..a58bfb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateLong.java @@ -47,9 +47,6 @@ protected void func(BytesColumnVector outV, long[] vector, int i) { date.setTime(DateWritable.daysToMillis((int) vector[i])); break; - case TIMESTAMP: - date.setTime(vector[i] / 1000000); - break; default: throw new Error("Unsupported input type " + inputTypes[0].name()); } @@ -68,7 +65,7 @@ protected void func(BytesColumnVector outV, long[] vector, int i) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.DATETIME_FAMILY) + VectorExpressionDescriptor.ArgumentType.DATE) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java index 58724a4..880f2a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMonthLong.java @@ -18,8 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; import java.util.Calendar; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; + /** * Returns month value. * Extends {@link VectorUDFTimestampFieldLong} @@ -37,9 +40,9 @@ public VectorUDFMonthLong() { } @Override - protected long getTimestampField(long time) { + protected long getTimestampField(TimestampColumnVector timestampColVector, int elementNum) { /* january is 0 */ - return 1 + super.getTimestampField(time); + return 1 + super.getTimestampField(timestampColVector, elementNum); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java index 3b9fffc..b73a68c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldLong.java @@ -21,7 +21,10 @@ import java.sql.Timestamp; import java.util.Calendar; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -37,7 +40,7 @@ protected int outputColumn; protected int field; protected transient final Calendar calendar = Calendar.getInstance(); - protected transient final Timestamp ts = new Timestamp(0); + protected transient Timestamp scratchTimestamp; public VectorUDFTimestampFieldLong(int field, int colNum, int outputColumn) { this(); @@ -50,34 +53,9 @@ public VectorUDFTimestampFieldLong() { super(); } - protected final Timestamp getTimestamp(long nanos) { - /* - * new Timestamp() stores the millisecond precision values in the nanos field. - * If you wanted to store 200ms it will result in nanos being set to 200*1000*1000. - * When you call setNanos(0), because there are no sub-ms times, it will set it to 0, - * ending up with a Timestamp which refers to 0ms by accident. - * CAVEAT: never use a sub-second value in new Timestamp() args, just use setNanos to set it. - */ - long ms = (nanos / (1000 * 1000 * 1000)) * 1000; - /* the milliseconds should be kept in nanos */ - long ns = nanos % (1000*1000*1000); - if (ns < 0) { - /* - * Due to the way java.sql.Timestamp stores sub-second values, it throws an exception - * if nano seconds are negative. The timestamp implementation handles this by using - * negative milliseconds and adjusting the nano seconds up by the same to be positive. - * Read Timestamp.java:setTime() implementation for this code. - */ - ms -= 1000; - ns += 1000*1000*1000; - } - ts.setTime(ms); - ts.setNanos((int) ns); - return ts; - } - - protected long getTimestampField(long time) { - calendar.setTime(getTimestamp(time)); + protected long getTimestampField(TimestampColumnVector timestampColVector, int elementNum) { + timestampColVector.timestampUpdate(scratchTimestamp, elementNum); + calendar.setTime(scratchTimestamp); return calendar.get(field); } @@ -89,16 +67,21 @@ protected long getDateField(long days) { @Override public void evaluate(VectorizedRowBatch batch) { + if (scratchTimestamp == null) { + scratchTimestamp = new Timestamp(0); + } + if (childExpressions != null) { super.evaluateChildren(batch); } LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; - LongColumnVector inputCol = (LongColumnVector)batch.cols[this.colNum]; + ColumnVector inputColVec = batch.cols[this.colNum]; + /* every line below this is identical for evaluateLong & evaluateString */ - final int n = inputCol.isRepeating ? 1 : batch.size; + final int n = inputColVec.isRepeating ? 1 : batch.size; int[] sel = batch.selected; - final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse; + final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse; if(batch.size == 0) { /* n != batch.size when isRepeating */ @@ -106,39 +89,43 @@ public void evaluate(VectorizedRowBatch batch) { } /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + outV.isRepeating = inputColVec.isRepeating; switch (inputTypes[0]) { case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = getTimestampField(inputCol.vector[i]); - } - } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = getTimestampField(inputCol.vector[i]); - } - } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getTimestampField(inputCol.vector[i]); + { + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec; + + if (inputColVec.noNulls) { + outV.noNulls = true; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getTimestampField(timestampColVector, i); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = getTimestampField(timestampColVector, i); } } } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getTimestampField(inputCol.vector[i]); + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getTimestampField(timestampColVector, i); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getTimestampField(timestampColVector, i); + } } } } @@ -146,35 +133,39 @@ public void evaluate(VectorizedRowBatch batch) { break; case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.vector[i] = getDateField(inputCol.vector[i]); - } - } else { - for(int i = 0; i < n; i++) { - outV.vector[i] = getDateField(inputCol.vector[i]); - } - } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. - outV.noNulls = false; - if (selectedInUse) { - for(int j=0; j < n; j++) { - int i = sel[j]; - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getDateField(inputCol.vector[i]); + { + LongColumnVector longColVector = (LongColumnVector) inputColVec; + + if (inputColVec.noNulls) { + outV.noNulls = true; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.vector[i] = getDateField(longColVector.vector[i]); + } + } else { + for(int i = 0; i < n; i++) { + outV.vector[i] = getDateField(longColVector.vector[i]); } } } else { - for(int i = 0; i < n; i++) { - outV.isNull[i] = inputCol.isNull[i]; - if (!inputCol.isNull[i]) { - outV.vector[i] = getDateField(inputCol.vector[i]); + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (selectedInUse) { + for(int j=0; j < n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getDateField(longColVector.vector[i]); + } + } + } else { + for(int i = 0; i < n; i++) { + outV.isNull[i] = inputColVec.isNull[i]; + if (!inputColVec.isNull[i]) { + outV.vector[i] = getDateField(longColVector.vector[i]); + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java index 6df68f0..b8fc645 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampLong.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.sql.Timestamp; + +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.serde2.io.DateWritable; /** @@ -29,14 +32,8 @@ private static final long serialVersionUID = 1L; @Override - protected long getTimestampField(long time) { - long ms = (time / (1000*1000*1000)) * 1000; - long remainder = time % (1000*1000*1000); - /* negative timestamps need to be adjusted */ - if(remainder < 0) { - ms -= 1000; - } - return ms / 1000; + protected long getTimestampField(TimestampColumnVector timestampColVector, int elementNum) { + return timestampColVector.getEpochSeconds(elementNum); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java index 41c9d5b..3a93fb5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import java.util.Arrays; import java.util.Calendar; /** @@ -28,34 +27,6 @@ public final class VectorUDFYearLong extends VectorUDFTimestampFieldLong { private static final long serialVersionUID = 1L; - /* year boundaries in nanoseconds */ - private static transient final long[] YEAR_BOUNDARIES; - private static transient final int MIN_YEAR = 1678; - private static transient final int MAX_YEAR = 2300; - - static { - YEAR_BOUNDARIES = new long[MAX_YEAR-MIN_YEAR]; - Calendar c = Calendar.getInstance(); - c.setTimeInMillis(0); // c.set doesn't reset millis - /* 1901 Jan is not with in range */ - for(int year=MIN_YEAR+1; year <= MAX_YEAR; year++) { - c.set(year, Calendar.JANUARY, 1, 0, 0, 0); - YEAR_BOUNDARIES[year-MIN_YEAR-1] = c.getTimeInMillis()*1000*1000; - } - } - - @Override - protected long getTimestampField(long time) { - /* binarySearch is faster than a loop doing a[i] (no array out of bounds checks) */ - int year = Arrays.binarySearch(YEAR_BOUNDARIES, time); - if(year >= 0) { - /* 0 == 1902 etc */ - return MIN_YEAR + 1 + year; - } else { - /* -1 == 1901, -2 == 1902 */ - return MIN_YEAR - 1 - year; - } - } public VectorUDFYearLong(int colNum, int outputColumn) { super(Calendar.YEAR, colNum, outputColumn); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java new file mode 100644 index 0000000..b0aedea --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgTimestamp.java @@ -0,0 +1,483 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** + * Generated from template VectorUDAFAvg.txt. + */ +@Description(name = "avg", + value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: timestamp)") +public class VectorUDAFAvgTimestamp extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private double sum; + transient private long count; + + /** + * Value is explicitly (re)initialized in reset() + */ + transient private boolean isNull = true; + + public void sumValue(double value) { + if (isNull) { + sum = value; + count = 1; + isNull = false; + } else { + sum += value; + count++; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset () { + isNull = true; + sum = 0; + count = 0L; + } + } + + private VectorExpression inputExpression; + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private StructObjectInspector soi; + + public VectorUDAFAvgTimestamp(VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public VectorUDAFAvgTimestamp() { + super(); + partialResult = new Object[2]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + initPartialResultInspector(); + } + + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize); + } + } + } else { + if (inputColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize, inputColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize, inputColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getEpochSecondsWithFractionalNanos(selection[i])); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getEpochSecondsWithFractionalNanos(i)); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + j); + myagg.sumValue(inputColVector.getEpochSecondsWithFractionalNanos(i)); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getEpochSecondsWithFractionalNanos(i)); + } + } + } + + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = + (TimestampColumnVector)batch.cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += inputColVector.getEpochSecondsWithFractionalNanos(0)*batchSize; + myagg.count += batchSize; + } + return; + } + + if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getEpochSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += value; + myagg.count += 1; + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + + for (int i=0; i< batchSize; ++i) { + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getEpochSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getEpochSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getEpochSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getEpochSecondsWithFractionalNanos(i); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + TimestampColumnVector inputColVector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + iterateRepeatingNoNulls(myagg, inputColVector.getEpochSecondsWithFractionalNanos(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(selected[0]); + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = inputColVector.getEpochSecondsWithFractionalNanos(selected[i]); + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = inputColVector.getEpochSecondsWithFractionalNanos(0); + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i= 0) { return millis / 1000; } else { diff --git storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java new file mode 100644 index 0000000..2b67c2e --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/common/type/PisaTimestamp.java @@ -0,0 +1,470 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import java.sql.Timestamp; +import java.util.Date; +import java.util.concurrent.TimeUnit; + +import com.google.common.base.Preconditions; + +/** + * Pisa project is named after the famous Leonardo of Pisa, or better known as Fibanacci. + * + * A Pisa timestamp is a timestamp without a time-zone (i.e. local) in the ISO-8601 calendar system, + * such as 2007-12-03 10:15:30.0123456789, with accuracy to the nanosecond (1 billionth of a + * second). + * + * Pisa timestamps use the same starting point as a java.sql.Timestamp -- the number of nanoseconds + * since the epoch (1970-01-01, or the day Unix roared awake) where negative numbers represent + * earlier days. + * + * However, we use the PisaTimestamp class which has different design requirements than + * java.sql.Timestamp. It is designed to be mutable and NOT thread-safe to avoid high memory + * allocation / garbage collection costs. And, provides for ease of use by our vectorization + * code to avoid the high CPU data cache miss cost for small objects, too. We do this by allowing + * the epoch day and nano of day to be stored externally (i.e. vector arrays). + * + * And, importantly, PisaTimestamp is a light-weight class similar to the epochDay/NanoOfDay of + * the newer Java 8 LocalDateTime class, except the timestamp is *indifferent* to timezone. + * + * A common usage would be to treat it as UTC. + * + * You can work with days, seconds, milliseconds, nanoseconds, etc. But to work with months you + * will need to convert to an external timestamp object and use calendars, etc. + * * + * The storage for a PisaTimestamp is: + * + * long epochDay + * // The number of days since 1970-01-01 (==> similar to Java 8 LocalDate). + * long nanoOfDay + * // The number of nanoseconds within the day, with the range of + * // 0 to 24 * 60 * 60 * 1,000,000,000 - 1 (==> similar to Java 8 LocalTime). + * + * Both epochDay and nanoOfDay are signed. + * + * We when both epochDay and nanoOfDay are non-zero, we will maintain them so they have the + * same sign. + * + */ + +public class PisaTimestamp { + + private static final long serialVersionUID = 1L; + + private long epochDay; + private long nanoOfDay; + + public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); + public static final long NANOSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1); + public static final long NANOSECONDS_PER_DAY = TimeUnit.DAYS.toNanos(1); + public static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); + public static final long MILLISECONDS_PER_DAY = TimeUnit.DAYS.toMillis(1); + public static final long SECONDS_PER_DAY = TimeUnit.DAYS.toSeconds(1); + + + public long getEpochDay() { + return epochDay; + } + + public long getNanoOfDay() { + return nanoOfDay; + } + + public PisaTimestamp() { + epochDay = 0; + nanoOfDay = 0; + } + + public PisaTimestamp(Timestamp timestamp) { + epochDay = 0; + nanoOfDay = 0; + update(timestamp); + } + + public void reset() { + epochDay = 0; + nanoOfDay = 0; + } + + /** + * NOTE: This method validates the integrity rules between epoch day and nano of day, + * but not overflow/underflow of epoch day. Since epoch day overflow/underflow can result + * from to client data input, that must be checked manually with as this + * class do not throw data range exceptions as a rule. It leaves that choice to the caller. + * @param epochDay + * @param nanoOfDay + * @return true if epoch day and nano of day have integrity. + */ + public static boolean validateIntegrity(long epochDay, long nanoOfDay) { + + // Range check nano per day as invariant. + if (nanoOfDay >= NANOSECONDS_PER_DAY || nanoOfDay <= -NANOSECONDS_PER_DAY) { + return false; + } + + // Signs of epoch day and nano of day must match. + if (!(epochDay >= 0 && nanoOfDay >= 0 || + epochDay <= 0 && nanoOfDay <= 0)) { + return false; + } + + return true; + } + + /** + * Set the PisaTimestamp from a Timestamp object. + * @param timestamp + * @return this + */ + public PisaTimestamp update(Timestamp timestamp) { + + // For timestamps, we don't use the milliseconds part. It is covered by nanos. + long epochSeconds = timestamp.getTime() / MILLISECONDS_PER_SECOND; + + epochDay = epochSeconds / SECONDS_PER_DAY; + + // NOTE: (Timestamp) Nanos is always positive, so we have to compensate below. + int nanos = timestamp.getNanos(); + + if (epochSeconds >= 0) { + nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND + nanos; + } else { + // Compensate .... + nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND - 1000000000 - nanos; + } + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + return this; + } + + /** + * Set this PisaTimestamp from another PisaTimestamp. + * @param source + * @return this + */ + public PisaTimestamp update(PisaTimestamp source) { + this.epochDay = source.epochDay; + this.nanoOfDay = source.nanoOfDay; + return this; + } + + /** + * Set this PisaTimestamp from a epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return this + */ + public PisaTimestamp update(long epochDay, long nanoOfDay) { + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + return this; + } + + /** + * Set this PisaTimestamp from a epoch milliseconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromEpochMilliseconds(long epochMilliseconds) { + + long epochDay = epochMilliseconds / MILLISECONDS_PER_DAY; + long nanoOfDay = (epochMilliseconds % MILLISECONDS_PER_DAY) * NANOSECONDS_PER_MILLISECOND; + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + return this; + } + + /** + * Set this PisaTimestamp from a epoch nanoseconds. + * @param epochMilliseconds + * @return this + */ + public PisaTimestamp updateFromEpochNanoseconds(long epochNanoseconds) { + + long epochDay = epochNanoseconds / NANOSECONDS_PER_DAY; + long nanoOfDay = epochNanoseconds % NANOSECONDS_PER_DAY; + + Preconditions.checkState(validateIntegrity(epochDay, nanoOfDay)); + + this.epochDay = epochDay; + this.nanoOfDay = nanoOfDay; + return this; + } + + /** + * Set a scratch PisaTimestamp with this PisaTimestamp's values and return the scratch object. + * @param epochDay + * @param nanoOfDay + */ + public PisaTimestamp scratchCopy(PisaTimestamp scratch) { + + scratch.epochDay = epochDay; + scratch.nanoOfDay = nanoOfDay; + return scratch; + } + + /** + * Set a Timestamp object from this PisaTimestamp. + * @param timestamp + */ + public void timestampUpdate(Timestamp timestamp) { + long epochSeconds = epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + + // NOTE: (Timestamp) Nanos is always positive, so we have to take the absolute value. + int nanos = (int) Math.abs(nanoOfDay % NANOSECONDS_PER_SECOND);; + + ((Date) timestamp).setTime(epochSeconds * MILLISECONDS_PER_SECOND); + timestamp.setNanos(nanos); + } + + public int compareTo(PisaTimestamp another) { + + if (epochDay == another.epochDay) { + if (nanoOfDay == another.nanoOfDay){ + return 0; + } else { + return (nanoOfDay < another.nanoOfDay ? -1 : 1); + } + } else { + return (epochDay < another.epochDay ? -1: 1); + } + } + + public static int compareTo(long epochDay1, long nanoOfDay1, PisaTimestamp another) { + + if (epochDay1 == another.epochDay) { + if (nanoOfDay1 == another.nanoOfDay){ + return 0; + } else { + return (nanoOfDay1 < another.nanoOfDay ? -1 : 1); + } + } else { + return (epochDay1 < another.epochDay ? -1: 1); + } + } + + public static int compareTo(PisaTimestamp pisaTimestamp1, long epochDay2, long nanoOfDay2) { + + if (pisaTimestamp1.epochDay == epochDay2) { + if (pisaTimestamp1.nanoOfDay == nanoOfDay2){ + return 0; + } else { + return (pisaTimestamp1.nanoOfDay < nanoOfDay2 ? -1 : 1); + } + } else { + return (pisaTimestamp1.epochDay < epochDay2 ? -1: 1); + } + } + + public static int compareTo(long epochDay1, long nanoOfDay1, long epochDay2, long nanoOfDay2) { + + if (epochDay1 == epochDay2) { + if (nanoOfDay1 == nanoOfDay2){ + return 0; + } else { + return (nanoOfDay1 < nanoOfDay2 ? -1 : 1); + } + } else { + return (epochDay1 < epochDay2 ? -1: 1); + } + } + + public boolean equals(PisaTimestamp other) { + + if (epochDay == other.epochDay) { + if (nanoOfDay == other.nanoOfDay) { + return true; + } else { + return false; + } + } else { + return false; + } + } + + public static void add(PisaTimestamp pisaTimestamp1, PisaTimestamp pisaTimestamp2, + PisaTimestamp result) { + add(pisaTimestamp1.epochDay, pisaTimestamp1.nanoOfDay, + pisaTimestamp2.epochDay, pisaTimestamp2.nanoOfDay, + result); + } + + public static void add(long epochDay1, long nanoOfDay1, + long epochDay2, long nanoOfDay2, + PisaTimestamp result) { + + // Validate integrity rules between epoch day and nano of day. + Preconditions.checkState(PisaTimestamp.validateIntegrity(epochDay1, nanoOfDay1)); + Preconditions.checkState(PisaTimestamp.validateIntegrity(epochDay2, nanoOfDay2)); + + long resultEpochDay = epochDay1 + epochDay2; + long resultNanoOfDay = nanoOfDay1 + nanoOfDay2; + + if (resultNanoOfDay >= NANOSECONDS_PER_DAY) { + // Carry. + + // Both must have been positive. + Preconditions.checkState(nanoOfDay1 > 0 && nanoOfDay2 > 0); + + // Positive epoch day and positive (or zero) nano of day. + resultEpochDay += resultNanoOfDay / NANOSECONDS_PER_DAY; + resultNanoOfDay = resultNanoOfDay % NANOSECONDS_PER_DAY; + + } else if (resultNanoOfDay <= -NANOSECONDS_PER_DAY) { + // Underflow. + + // Both must have been negative. + Preconditions.checkState(nanoOfDay1 < 0 && nanoOfDay2 < 0); + + // Negative epoch day and negative (or zero) nano of day. + resultEpochDay += resultNanoOfDay / NANOSECONDS_PER_DAY; + resultNanoOfDay = resultNanoOfDay % NANOSECONDS_PER_DAY; + + } + + // The update method will validate integrity rules between epoch day and nano of day, + // but not overflow/underflow of epoch day. + result.update(resultEpochDay, resultNanoOfDay); + } + + public static void addSeconds(PisaTimestamp timestamp1, long epochSeconds, PisaTimestamp result) { + long epochDay = epochSeconds / SECONDS_PER_DAY; + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND; + add(timestamp1.epochDay, timestamp1.nanoOfDay, epochDay, nanoOfDay, result); + } + + public static void subtract(PisaTimestamp timestamp1, PisaTimestamp timestamp2, + PisaTimestamp result) { + + add(timestamp1.epochDay, timestamp1.nanoOfDay, -timestamp2.epochDay, -timestamp2.nanoOfDay, + result); + } + + public static void subtract(long epochDay1, long nanoOfDay1, + long epochDay2, long nanoOfDay2, + PisaTimestamp result) { + + add(epochDay1, nanoOfDay1, -epochDay2, -nanoOfDay2, result); + } + + public static void subtractSeconds(PisaTimestamp timestamp1, long epochSeconds, + PisaTimestamp result) { + long epochDay = epochSeconds / SECONDS_PER_DAY; + long nanoOfDay = (epochSeconds % SECONDS_PER_DAY) * NANOSECONDS_PER_SECOND; + add(timestamp1.epochDay, timestamp1.nanoOfDay, -epochDay, -nanoOfDay, result); + } + + /** + * Return a double with the integer part as the epoch seconds and the fractional part as + * the nanoseconds. + * @return seconds.nanoseconds + */ + public double getEpochSecondsWithFractionalNanos() { + + double seconds = epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + double nanos = nanoOfDay % NANOSECONDS_PER_SECOND; + + return seconds + nanos / NANOSECONDS_PER_SECOND; + } + + /** + * Return a double with the integer part as the epoch seconds and the fractional part as + * the nanoseconds, given the epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return seconds.nanoseconds + */ + public static double getEpochSecondsWithFractionalNanos(long epochDay, long nanoOfDay) { + + double seconds = epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + double nanos = nanoOfDay % NANOSECONDS_PER_SECOND; + + return seconds + nanos / NANOSECONDS_PER_SECOND; + } + + /** + * Return the epoch seconds. + * @return + */ + public long getEpochSeconds() { + return epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + } + + /** + * Return the epoch seconds, given the epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return + */ + public static long getEpochSecondsFromEpochDayAndNanoOfDay(long epochDay, long nanoOfDay) { + return epochDay * SECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_SECOND; + } + + /** + * Return the epoch milliseconds. + * @return + */ + public long getEpochMilliseconds() { + return epochDay * MILLISECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_MILLISECOND; + } + + /** + * Return the epoch seconds, given the epoch day and nano of day. + * @param epochDay + * @param nanoOfDay + * @return + */ + public static long getEpochMillisecondsFromEpochDayAndNanoOfDay(long epochDay, long nanoOfDay) { + return epochDay * MILLISECONDS_PER_DAY + nanoOfDay / NANOSECONDS_PER_MILLISECOND; + } + + /** + * Return the Timestamp class style nanos (0 .. 999999999). + * @return + */ + public int getNanos() { + // NOTE: (Timestamp) Nanos is always positive. + return (int) Math.abs(nanoOfDay % NANOSECONDS_PER_SECOND); + } + + /** + * Return the Timestamp class style nanos (0 .. 999999999), given the epoch of day and + * nano of day. + * @param nanoOfDay + * @return + */ + public static int getNanosFromNanoOfDay(long nanoOfDay) { + // NOTE: (Timestamp) Nanos is always positive. + return (int) Math.abs(nanoOfDay % NANOSECONDS_PER_SECOND); + } + + public long getTotalNanoseconds() { + return epochDay * NANOSECONDS_PER_DAY + nanoOfDay; + } + + public static long getTotalNanoseconds(long epochDay, long nanoOfDay) { + return epochDay * NANOSECONDS_PER_DAY + nanoOfDay; + } +} \ No newline at end of file diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index fcb1ae9..4ae9c47 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -42,6 +42,7 @@ DOUBLE, BYTES, DECIMAL, + TIMESTAMP, STRUCT, LIST, MAP, diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java new file mode 100644 index 0000000..054a8f1 --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -0,0 +1,393 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.PisaTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.io.Writable; + +/** + * This class represents a nullable timestamp column vector capable of handing a wide range of + * timestamp values. + * + * We use the PisaTimestamp which is designed to be mutable and avoid the heavy memory allocation + * and CPU data cache miss costs. + */ +public class TimestampColumnVector extends ColumnVector { + + /* + * The storage arrays for this column vector corresponds to the storage of a PisaTimestamp: + */ + private long[] epochDay; + // An array of the number of days since 1970-01-01 (similar to Java 8 LocalDate). + + private long[] nanoOfDay; + // An array of the number of nanoseconds within the day, with the range of + // 0 to 24 * 60 * 60 * 1,000,000,000 - 1 (similar to Java 8 LocalTime). + + /* + * Scratch objects. + */ + private PisaTimestamp scratchPisaTimestamp; + private Timestamp scratchTimestamp; + // Convenience scratch timestamp objects. + + private Writable scratchWritable; + // Supports keeping a TimestampWritable object without having to import that definition... + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public TimestampColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public TimestampColumnVector(int len) { + super(len); + + epochDay = new long[len]; + nanoOfDay = new long[len]; + + scratchPisaTimestamp = new PisaTimestamp(); + scratchTimestamp = new Timestamp(0); + + scratchWritable = null; // Allocated by caller. + } + + public int getLength() { + return epochDay.length; + } + + /** + * Returnt a row's epoch day. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochDay(int elementNum) { + return epochDay[elementNum]; + } + + /** + * Return a row's nano of day. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getNanoOfDay(int elementNum) { + return nanoOfDay[elementNum]; + } + + /** + * Get a scratch PisaTimestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return scratch + */ + public PisaTimestamp getPisaTimestamp(int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + return scratchPisaTimestamp; + } + + /** + * Set a PisaTimestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param pisaTimestamp + * @param elementNum + */ + public void pisaTimestampUpdate(PisaTimestamp pisaTimestamp, int elementNum) { + pisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Set a Timestamp object from a row of the column. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param timestamp + * @param elementNum + */ + public void timestampUpdate(Timestamp timestamp, int elementNum) { + scratchPisaTimestamp.update(epochDay[elementNum], nanoOfDay[elementNum]); + scratchPisaTimestamp.timestampUpdate(timestamp); + } + + /** + * Compare row to PisaTimestamp. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @param pisaTimestamp + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(int elementNum, PisaTimestamp pisaTimestamp) { + return PisaTimestamp.compareTo(epochDay[elementNum], nanoOfDay[elementNum], pisaTimestamp); + } + + /** + * Compare PisaTimestamp to row. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param pisaTimestamp + * @param elementNum + * @return -1, 0, 1 standard compareTo values. + */ + public int compareTo(PisaTimestamp pisaTimestamp, int elementNum) { + return PisaTimestamp.compareTo(pisaTimestamp, epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Compare a row to another TimestampColumnVector's row. + * @param elementNum1 + * @param timestampColVector2 + * @param elementNum2 + * @return + */ + public int compareTo(int elementNum1, TimestampColumnVector timestampColVector2, + int elementNum2) { + return PisaTimestamp.compareTo( + epochDay[elementNum1], nanoOfDay[elementNum1], + timestampColVector2.epochDay[elementNum2], timestampColVector2.nanoOfDay[elementNum2]); + } + + /** + * Compare another TimestampColumnVector's row to a row. + * @param timestampColVector1 + * @param elementNum1 + * @param elementNum2 + * @return + */ + public int compareTo(TimestampColumnVector timestampColVector1, int elementNum1, + int elementNum2) { + return PisaTimestamp.compareTo( + timestampColVector1.epochDay[elementNum1], timestampColVector1.nanoOfDay[elementNum1], + epochDay[elementNum2], nanoOfDay[elementNum2]); + } + + public void add(PisaTimestamp timestamp1, PisaTimestamp timestamp2, int resultElementNum) { + PisaTimestamp.add(timestamp1, timestamp2, scratchPisaTimestamp); + epochDay[resultElementNum] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[resultElementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + public void subtract(PisaTimestamp timestamp1, PisaTimestamp timestamp2, int resultElementNum) { + PisaTimestamp.subtract(timestamp1, timestamp2, scratchPisaTimestamp); + epochDay[resultElementNum] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[resultElementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Return row as a double with the integer part as the epoch seconds and the fractional part as + * the nanoseconds. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return seconds.nanoseconds + */ + public double getEpochSecondsWithFractionalNanos(int elementNum) { + return PisaTimestamp.getEpochSecondsWithFractionalNanos( + epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Return row as epoch seconds. + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public long getEpochSeconds(int elementNum) { + return PisaTimestamp.getEpochSecondsFromEpochDayAndNanoOfDay(epochDay[elementNum], nanoOfDay[elementNum]); + } + + /** + * Return row as the Timestamp class style nanos (0 .. 999999999). + * We assume the entry has already been NULL checked and isRepeated adjusted. + * @param elementNum + * @return + */ + public int getNanos(int elementNum) { + return PisaTimestamp.getNanosFromNanoOfDay(nanoOfDay[elementNum]); + } + + public long getTotalNanoseconds(int elementNum) { + return PisaTimestamp.getTotalNanoseconds(epochDay[elementNum], nanoOfDay[elementNum]); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + + TimestampColumnVector timestampColVector = (TimestampColumnVector) inputVector; + + epochDay[outElementNum] = timestampColVector.epochDay[inputElementNum]; + nanoOfDay[outElementNum] = timestampColVector.nanoOfDay[inputElementNum]; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + // TODO Auto-generated method stub + + } + + /** + * Set a row from a PisaTimestamp. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param pisaTimestamp + */ + public void set(int elementNum, PisaTimestamp pisaTimestamp) { + this.epochDay[elementNum] = pisaTimestamp.getEpochDay(); + this.nanoOfDay[elementNum] = pisaTimestamp.getNanoOfDay(); + } + + /** + * Set a row from a timestamp. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void set(int elementNum, Timestamp timestamp) { + scratchPisaTimestamp.update(timestamp); + this.epochDay[elementNum] = scratchPisaTimestamp.getEpochDay(); + this.nanoOfDay[elementNum] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Set a row from epoch nanoseconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param totalNanoseconds + */ + public void setEpochNanoseconds(int elementNum, long totalNanoseconds) { + epochDay[elementNum] = totalNanoseconds / PisaTimestamp.NANOSECONDS_PER_DAY; + nanoOfDay[elementNum] = totalNanoseconds % PisaTimestamp.NANOSECONDS_PER_DAY; + } + + /** + * Set a row from epoch milliseconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void setEpochMilliseconds(int elementNum, long epochMilliseconds) { + epochDay[elementNum] = epochMilliseconds / PisaTimestamp.MILLISECONDS_PER_DAY; + nanoOfDay[elementNum] = epochMilliseconds % PisaTimestamp.MILLISECONDS_PER_DAY; + } + + /** + * Set a row from epoch seconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void setEpochSeconds(int elementNum, long epochSeconds) { + epochDay[elementNum] = epochSeconds / PisaTimestamp.SECONDS_PER_DAY; + nanoOfDay[elementNum] = epochSeconds % PisaTimestamp.SECONDS_PER_DAY; + } + + /** + * Set a row from a double epoch seconds with fractional nanoseconds. + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + * @param timestamp + */ + public void setEpochSeconds(int elementNum, double epochSecondsWithFractionalNanoseconds) { + epochDay[elementNum] = (long) epochSecondsWithFractionalNanoseconds / PisaTimestamp.SECONDS_PER_DAY; + nanoOfDay[elementNum] = (long) + (epochSecondsWithFractionalNanoseconds % PisaTimestamp.SECONDS_PER_DAY) * + PisaTimestamp.NANOSECONDS_PER_SECOND; + } + + /** + * Set row to standard null value(s). + * We assume the entry has already been isRepeated adjusted. + * @param elementNum + */ + public void setNullValue(int elementNum) { + epochDay[elementNum] = 1; + nanoOfDay[elementNum] = 1; + } + + /** + * Fill all the vector entries with a PisaTimestamp. + * @param pisaTimestamp + */ + public void fill(PisaTimestamp pisaTimestamp) { + noNulls = true; + isRepeating = true; + epochDay[0] = pisaTimestamp.getEpochDay(); + nanoOfDay[0] = pisaTimestamp.getNanoOfDay(); + } + + /** + * Fill all the vector entries with a timestamp. + * @param timestamp + */ + public void fill(Timestamp timestamp) { + noNulls = true; + isRepeating = true; + scratchPisaTimestamp.update(timestamp); + epochDay[0] = scratchPisaTimestamp.getEpochDay(); + nanoOfDay[0] = scratchPisaTimestamp.getNanoOfDay(); + } + + /** + * Fill all the vector entries with a PisaTimestamp. + * @param pisaTimestamp + */ + public void fillTotalNanoseconds(long totalNanoseconds) { + noNulls = true; + isRepeating = true; + epochDay[0] = totalNanoseconds / PisaTimestamp.NANOSECONDS_PER_DAY; + nanoOfDay[0] = totalNanoseconds % PisaTimestamp.NANOSECONDS_PER_DAY; + } + + /** + * Return a convenience writable object stored by this column vector. + * Supports keeping a TimestampWritable object without having to import that definition... + * @return + */ + public Writable getScratchWritable() { + return scratchWritable; + } + + /** + * Set the convenience writable object stored by this column vector + * @param scratchWritable + */ + public void setScratchWritable(Writable scratchWritable) { + this.scratchWritable = scratchWritable; + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + scratchPisaTimestamp.update(epochDay[row], nanoOfDay[row]); + scratchPisaTimestamp.timestampUpdate(scratchTimestamp); + buffer.append(scratchTimestamp.toString()); + } else { + buffer.append("null"); + } + } +} \ No newline at end of file diff --git storage-api/src/test/org/apache/hadoop/hive/common/type/TestPisaTimestamp.java storage-api/src/test/org/apache/hadoop/hive/common/type/TestPisaTimestamp.java new file mode 100644 index 0000000..cd59b8d --- /dev/null +++ storage-api/src/test/org/apache/hadoop/hive/common/type/TestPisaTimestamp.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.type; + +import org.junit.Test; + +import java.sql.Timestamp; +import java.util.Date; +import java.util.Random; + +import static org.junit.Assert.assertEquals; + +/** + * Test for ListColumnVector + */ +public class TestPisaTimestamp { + + public static final int NANOS_PER_SEC = 1000000000; + + public static Timestamp getRandTimestamp(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(NANOS_PER_SEC))); + } + String timestampStr = String.format("%d-%02d-%02d %02d:%02d:%02d%s", + Integer.valueOf(1970 + r.nextInt(200)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + Timestamp timestampVal = Timestamp.valueOf(timestampStr); + return timestampVal; + } + + public static void updateTimestamp(Timestamp timestamp, long secondsAsMillis, int nanos) { + ((Date) timestamp).setTime(secondsAsMillis); + timestamp.setNanos(nanos); + } + + private static int TEST_COUNT = 5000; + + @Test + public void testPisaTimestampCreate() throws Exception { + + Random r = new Random(1234); + + Timestamp[] timestamps = new Timestamp[TEST_COUNT]; + PisaTimestamp[] pisaTimestamps = new PisaTimestamp[TEST_COUNT]; + for (int i = 0; i < TEST_COUNT; i++) { + timestamps[i] = getRandTimestamp(r); + pisaTimestamps[i] = new PisaTimestamp(timestamps[i]); + + long epochSeconds = pisaTimestamps[i].getEpochSeconds(); + int nanos = pisaTimestamps[i].getNanos(); + Timestamp timestamp = new Timestamp(0); + updateTimestamp(timestamp, epochSeconds * 1000L, nanos); + assertEquals(timestamp, timestamps[i]); + } + } +}