diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 9caffb6..4ebdf90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7585,22 +7585,27 @@ private Operator insertSelectForSemijoin(ArrayList fields, RowResolver inputRR = opParseCtx.get(input).getRowResolver(); ArrayList colList = new ArrayList(); - ArrayList columnNames = new ArrayList(); - + ArrayList outputColumnNames = new ArrayList(); Map colExprMap = new HashMap(); + + RowResolver outputRR = new RowResolver(); + // construct the list of columns that need to be projected - for (ASTNode field : fields) { - ExprNodeColumnDesc exprNode = (ExprNodeColumnDesc) genExprNodeDesc(field, - inputRR); + for (int i = 0; i < fields.size(); ++i) { + ASTNode field = fields.get(i); + ExprNodeDesc exprNode = genExprNodeDesc(field, inputRR); + String colName = getColumnInternalName(i); + outputColumnNames.add(colName); + ColumnInfo colInfo = new ColumnInfo(colName, exprNode.getTypeInfo(), "", false); + outputRR.putExpression(field, colInfo); colList.add(exprNode); - columnNames.add(exprNode.getColumn()); - colExprMap.put(exprNode.getColumn(), exprNode); + colExprMap.put(colName, exprNode); } // create selection operator Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(colList, columnNames, false), new RowSchema(inputRR - .getColumnInfos()), input), inputRR); + new SelectDesc(colList, outputColumnNames, false), + new RowSchema(outputRR.getColumnInfos()), input), outputRR); output.setColumnExprMap(colExprMap); return output; diff --git ql/src/test/queries/clientpositive/semijoin2.q ql/src/test/queries/clientpositive/semijoin2.q new file mode 100644 index 0000000..6f852c4 --- /dev/null +++ ql/src/test/queries/clientpositive/semijoin2.q @@ -0,0 +1,21 @@ +CREATE TABLE table_1 (boolean_col_1 BOOLEAN, float_col_2 FLOAT, bigint_col_3 BIGINT, varchar0111_col_4 VARCHAR(111), bigint_col_5 BIGINT, float_col_6 FLOAT, boolean_col_7 BOOLEAN, decimal0101_col_8 DECIMAL(1, 1), decimal0904_col_9 DECIMAL(9, 4), char0112_col_10 CHAR(112), double_col_11 DOUBLE, boolean_col_12 BOOLEAN, double_col_13 DOUBLE, varchar0142_col_14 VARCHAR(142), timestamp_col_15 TIMESTAMP, decimal0502_col_16 DECIMAL(5, 2), smallint_col_25 SMALLINT, decimal3222_col_18 DECIMAL(32, 22), boolean_col_19 BOOLEAN, decimal2012_col_20 DECIMAL(20, 12), char0204_col_21 CHAR(204), double_col_61 DOUBLE, timestamp_col_23 TIMESTAMP, int_col_24 INT, float_col_25 FLOAT, smallint_col_26 SMALLINT, double_col_27 DOUBLE, char0180_col_28 CHAR(180), decimal1503_col_29 DECIMAL(15, 3), timestamp_col_30 TIMESTAMP, smallint_col_31 SMALLINT, decimal2020_col_32 DECIMAL(20, 20), timestamp_col_33 TIMESTAMP, boolean_col_34 BOOLEAN, decimal3025_col_35 DECIMAL(30, 25), decimal3117_col_36 DECIMAL(31, 17), timestamp_col_37 TIMESTAMP, varchar0146_col_38 VARCHAR(146), boolean_col_39 BOOLEAN, double_col_40 DOUBLE, float_col_41 FLOAT, timestamp_col_42 TIMESTAMP, double_col_43 DOUBLE, boolean_col_44 BOOLEAN, timestamp_col_45 TIMESTAMP, tinyint_col_8 TINYINT, int_col_47 INT, decimal0401_col_48 DECIMAL(4, 1), varchar0064_col_49 VARCHAR(64), string_col_50 STRING, double_col_51 DOUBLE, string_col_52 STRING, boolean_col_53 BOOLEAN, int_col_54 INT, boolean_col_55 BOOLEAN, string_col_56 STRING, double_col_57 DOUBLE, varchar0131_col_58 VARCHAR(131), boolean_col_59 BOOLEAN, bigint_col_22 BIGINT, char0184_col_61 CHAR(184), varchar0173_col_62 VARCHAR(173), timestamp_col_63 TIMESTAMP, decimal1709_col_26 DECIMAL(20, 5), timestamp_col_65 TIMESTAMP, timestamp_col_66 TIMESTAMP, timestamp_col_67 TIMESTAMP, boolean_col_68 BOOLEAN, decimal1208_col_20 DECIMAL(33, 11), decimal1605_col_70 DECIMAL(16, 5), varchar0010_col_71 VARCHAR(10), tinyint_col_72 TINYINT, timestamp_col_10 TIMESTAMP, decimal2714_col_74 DECIMAL(27, 14), double_col_75 DOUBLE, boolean_col_76 BOOLEAN, double_col_77 DOUBLE, string_col_78 STRING, boolean_col_79 BOOLEAN, boolean_col_80 BOOLEAN, decimal0803_col_81 DECIMAL(8, 3), decimal1303_col_82 DECIMAL(13, 3), tinyint_col_83 TINYINT, decimal3424_col_84 DECIMAL(34, 24), float_col_85 FLOAT, boolean_col_86 BOOLEAN, char0233_col_87 CHAR(233)); + +CREATE TABLE table_18 (timestamp_col_1 TIMESTAMP, double_col_2 DOUBLE, boolean_col_3 BOOLEAN, timestamp_col_4 TIMESTAMP, decimal2103_col_5 DECIMAL(21, 3), char0221_col_6 CHAR(221), tinyint_col_7 TINYINT, float_col_8 FLOAT, int_col_2 INT, timestamp_col_10 TIMESTAMP, char0228_col_11 CHAR(228), timestamp_col_12 TIMESTAMP, double_col_13 DOUBLE, tinyint_col_6 TINYINT, tinyint_col_33 TINYINT, smallint_col_38 SMALLINT, boolean_col_17 BOOLEAN, double_col_18 DOUBLE, boolean_col_19 BOOLEAN, bigint_col_20 BIGINT, decimal0504_col_37 DECIMAL(37, 34), boolean_col_22 BOOLEAN, double_col_23 DOUBLE, timestamp_col_24 TIMESTAMP, varchar0076_col_25 VARCHAR(76), timestamp_col_18 TIMESTAMP, boolean_col_27 BOOLEAN, decimal1611_col_22 DECIMAL(37, 5), boolean_col_29 BOOLEAN); + +set hive.cbo.enable = false; + +explain +SELECT +COALESCE(498, LEAD(COALESCE(-973, -684, 515)) OVER (PARTITION BY (t2.int_col_2 + t1.smallint_col_25) ORDER BY (t2.int_col_2 + t1.smallint_col_25), FLOOR(t1.double_col_61) DESC), 524) AS int_col, +(t2.int_col_2) + (t1.smallint_col_25) AS int_col_1, +FLOOR(t1.double_col_61) AS float_col, +COALESCE(SUM(COALESCE(62, -380, -435)) OVER (PARTITION BY (t2.int_col_2 + t1.smallint_col_25) ORDER BY (t2.int_col_2 + t1.smallint_col_25) DESC, FLOOR(t1.double_col_61) DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 48 FOLLOWING), 704) AS int_col_2 +FROM table_1 t1 +INNER JOIN table_18 t2 ON (((t2.tinyint_col_6) = (t1.bigint_col_22)) AND ((t2.decimal0504_col_37) = (t1.decimal1709_col_26))) AND ((t2.tinyint_col_33) = (t1.tinyint_col_8)) +WHERE +(t2.smallint_col_38) IN (SELECT +COALESCE(-92, -994) AS int_col +FROM table_1 tt1 +INNER JOIN table_18 tt2 ON (tt2.decimal1611_col_22) = (tt1.decimal1208_col_20) +WHERE +(t1.timestamp_col_10) = (tt2.timestamp_col_18)); diff --git ql/src/test/results/clientpositive/semijoin2.q.out ql/src/test/results/clientpositive/semijoin2.q.out new file mode 100644 index 0000000..e7d7271 --- /dev/null +++ ql/src/test/results/clientpositive/semijoin2.q.out @@ -0,0 +1,283 @@ +PREHOOK: query: CREATE TABLE table_1 (boolean_col_1 BOOLEAN, float_col_2 FLOAT, bigint_col_3 BIGINT, varchar0111_col_4 VARCHAR(111), bigint_col_5 BIGINT, float_col_6 FLOAT, boolean_col_7 BOOLEAN, decimal0101_col_8 DECIMAL(1, 1), decimal0904_col_9 DECIMAL(9, 4), char0112_col_10 CHAR(112), double_col_11 DOUBLE, boolean_col_12 BOOLEAN, double_col_13 DOUBLE, varchar0142_col_14 VARCHAR(142), timestamp_col_15 TIMESTAMP, decimal0502_col_16 DECIMAL(5, 2), smallint_col_25 SMALLINT, decimal3222_col_18 DECIMAL(32, 22), boolean_col_19 BOOLEAN, decimal2012_col_20 DECIMAL(20, 12), char0204_col_21 CHAR(204), double_col_61 DOUBLE, timestamp_col_23 TIMESTAMP, int_col_24 INT, float_col_25 FLOAT, smallint_col_26 SMALLINT, double_col_27 DOUBLE, char0180_col_28 CHAR(180), decimal1503_col_29 DECIMAL(15, 3), timestamp_col_30 TIMESTAMP, smallint_col_31 SMALLINT, decimal2020_col_32 DECIMAL(20, 20), timestamp_col_33 TIMESTAMP, boolean_col_34 BOOLEAN, decimal3025_col_35 DECIMAL(30, 25), decimal3117_col_36 DECIMAL(31, 17), timestamp_col_37 TIMESTAMP, varchar0146_col_38 VARCHAR(146), boolean_col_39 BOOLEAN, double_col_40 DOUBLE, float_col_41 FLOAT, timestamp_col_42 TIMESTAMP, double_col_43 DOUBLE, boolean_col_44 BOOLEAN, timestamp_col_45 TIMESTAMP, tinyint_col_8 TINYINT, int_col_47 INT, decimal0401_col_48 DECIMAL(4, 1), varchar0064_col_49 VARCHAR(64), string_col_50 STRING, double_col_51 DOUBLE, string_col_52 STRING, boolean_col_53 BOOLEAN, int_col_54 INT, boolean_col_55 BOOLEAN, string_col_56 STRING, double_col_57 DOUBLE, varchar0131_col_58 VARCHAR(131), boolean_col_59 BOOLEAN, bigint_col_22 BIGINT, char0184_col_61 CHAR(184), varchar0173_col_62 VARCHAR(173), timestamp_col_63 TIMESTAMP, decimal1709_col_26 DECIMAL(20, 5), timestamp_col_65 TIMESTAMP, timestamp_col_66 TIMESTAMP, timestamp_col_67 TIMESTAMP, boolean_col_68 BOOLEAN, decimal1208_col_20 DECIMAL(33, 11), decimal1605_col_70 DECIMAL(16, 5), varchar0010_col_71 VARCHAR(10), tinyint_col_72 TINYINT, timestamp_col_10 TIMESTAMP, decimal2714_col_74 DECIMAL(27, 14), double_col_75 DOUBLE, boolean_col_76 BOOLEAN, double_col_77 DOUBLE, string_col_78 STRING, boolean_col_79 BOOLEAN, boolean_col_80 BOOLEAN, decimal0803_col_81 DECIMAL(8, 3), decimal1303_col_82 DECIMAL(13, 3), tinyint_col_83 TINYINT, decimal3424_col_84 DECIMAL(34, 24), float_col_85 FLOAT, boolean_col_86 BOOLEAN, char0233_col_87 CHAR(233)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_1 +POSTHOOK: query: CREATE TABLE table_1 (boolean_col_1 BOOLEAN, float_col_2 FLOAT, bigint_col_3 BIGINT, varchar0111_col_4 VARCHAR(111), bigint_col_5 BIGINT, float_col_6 FLOAT, boolean_col_7 BOOLEAN, decimal0101_col_8 DECIMAL(1, 1), decimal0904_col_9 DECIMAL(9, 4), char0112_col_10 CHAR(112), double_col_11 DOUBLE, boolean_col_12 BOOLEAN, double_col_13 DOUBLE, varchar0142_col_14 VARCHAR(142), timestamp_col_15 TIMESTAMP, decimal0502_col_16 DECIMAL(5, 2), smallint_col_25 SMALLINT, decimal3222_col_18 DECIMAL(32, 22), boolean_col_19 BOOLEAN, decimal2012_col_20 DECIMAL(20, 12), char0204_col_21 CHAR(204), double_col_61 DOUBLE, timestamp_col_23 TIMESTAMP, int_col_24 INT, float_col_25 FLOAT, smallint_col_26 SMALLINT, double_col_27 DOUBLE, char0180_col_28 CHAR(180), decimal1503_col_29 DECIMAL(15, 3), timestamp_col_30 TIMESTAMP, smallint_col_31 SMALLINT, decimal2020_col_32 DECIMAL(20, 20), timestamp_col_33 TIMESTAMP, boolean_col_34 BOOLEAN, decimal3025_col_35 DECIMAL(30, 25), decimal3117_col_36 DECIMAL(31, 17), timestamp_col_37 TIMESTAMP, varchar0146_col_38 VARCHAR(146), boolean_col_39 BOOLEAN, double_col_40 DOUBLE, float_col_41 FLOAT, timestamp_col_42 TIMESTAMP, double_col_43 DOUBLE, boolean_col_44 BOOLEAN, timestamp_col_45 TIMESTAMP, tinyint_col_8 TINYINT, int_col_47 INT, decimal0401_col_48 DECIMAL(4, 1), varchar0064_col_49 VARCHAR(64), string_col_50 STRING, double_col_51 DOUBLE, string_col_52 STRING, boolean_col_53 BOOLEAN, int_col_54 INT, boolean_col_55 BOOLEAN, string_col_56 STRING, double_col_57 DOUBLE, varchar0131_col_58 VARCHAR(131), boolean_col_59 BOOLEAN, bigint_col_22 BIGINT, char0184_col_61 CHAR(184), varchar0173_col_62 VARCHAR(173), timestamp_col_63 TIMESTAMP, decimal1709_col_26 DECIMAL(20, 5), timestamp_col_65 TIMESTAMP, timestamp_col_66 TIMESTAMP, timestamp_col_67 TIMESTAMP, boolean_col_68 BOOLEAN, decimal1208_col_20 DECIMAL(33, 11), decimal1605_col_70 DECIMAL(16, 5), varchar0010_col_71 VARCHAR(10), tinyint_col_72 TINYINT, timestamp_col_10 TIMESTAMP, decimal2714_col_74 DECIMAL(27, 14), double_col_75 DOUBLE, boolean_col_76 BOOLEAN, double_col_77 DOUBLE, string_col_78 STRING, boolean_col_79 BOOLEAN, boolean_col_80 BOOLEAN, decimal0803_col_81 DECIMAL(8, 3), decimal1303_col_82 DECIMAL(13, 3), tinyint_col_83 TINYINT, decimal3424_col_84 DECIMAL(34, 24), float_col_85 FLOAT, boolean_col_86 BOOLEAN, char0233_col_87 CHAR(233)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_1 +PREHOOK: query: CREATE TABLE table_18 (timestamp_col_1 TIMESTAMP, double_col_2 DOUBLE, boolean_col_3 BOOLEAN, timestamp_col_4 TIMESTAMP, decimal2103_col_5 DECIMAL(21, 3), char0221_col_6 CHAR(221), tinyint_col_7 TINYINT, float_col_8 FLOAT, int_col_2 INT, timestamp_col_10 TIMESTAMP, char0228_col_11 CHAR(228), timestamp_col_12 TIMESTAMP, double_col_13 DOUBLE, tinyint_col_6 TINYINT, tinyint_col_33 TINYINT, smallint_col_38 SMALLINT, boolean_col_17 BOOLEAN, double_col_18 DOUBLE, boolean_col_19 BOOLEAN, bigint_col_20 BIGINT, decimal0504_col_37 DECIMAL(37, 34), boolean_col_22 BOOLEAN, double_col_23 DOUBLE, timestamp_col_24 TIMESTAMP, varchar0076_col_25 VARCHAR(76), timestamp_col_18 TIMESTAMP, boolean_col_27 BOOLEAN, decimal1611_col_22 DECIMAL(37, 5), boolean_col_29 BOOLEAN) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_18 +POSTHOOK: query: CREATE TABLE table_18 (timestamp_col_1 TIMESTAMP, double_col_2 DOUBLE, boolean_col_3 BOOLEAN, timestamp_col_4 TIMESTAMP, decimal2103_col_5 DECIMAL(21, 3), char0221_col_6 CHAR(221), tinyint_col_7 TINYINT, float_col_8 FLOAT, int_col_2 INT, timestamp_col_10 TIMESTAMP, char0228_col_11 CHAR(228), timestamp_col_12 TIMESTAMP, double_col_13 DOUBLE, tinyint_col_6 TINYINT, tinyint_col_33 TINYINT, smallint_col_38 SMALLINT, boolean_col_17 BOOLEAN, double_col_18 DOUBLE, boolean_col_19 BOOLEAN, bigint_col_20 BIGINT, decimal0504_col_37 DECIMAL(37, 34), boolean_col_22 BOOLEAN, double_col_23 DOUBLE, timestamp_col_24 TIMESTAMP, varchar0076_col_25 VARCHAR(76), timestamp_col_18 TIMESTAMP, boolean_col_27 BOOLEAN, decimal1611_col_22 DECIMAL(37, 5), boolean_col_29 BOOLEAN) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_18 +PREHOOK: query: explain +SELECT +COALESCE(498, LEAD(COALESCE(-973, -684, 515)) OVER (PARTITION BY (t2.int_col_2 + t1.smallint_col_25) ORDER BY (t2.int_col_2 + t1.smallint_col_25), FLOOR(t1.double_col_61) DESC), 524) AS int_col, +(t2.int_col_2) + (t1.smallint_col_25) AS int_col_1, +FLOOR(t1.double_col_61) AS float_col, +COALESCE(SUM(COALESCE(62, -380, -435)) OVER (PARTITION BY (t2.int_col_2 + t1.smallint_col_25) ORDER BY (t2.int_col_2 + t1.smallint_col_25) DESC, FLOOR(t1.double_col_61) DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 48 FOLLOWING), 704) AS int_col_2 +FROM table_1 t1 +INNER JOIN table_18 t2 ON (((t2.tinyint_col_6) = (t1.bigint_col_22)) AND ((t2.decimal0504_col_37) = (t1.decimal1709_col_26))) AND ((t2.tinyint_col_33) = (t1.tinyint_col_8)) +WHERE +(t2.smallint_col_38) IN (SELECT +COALESCE(-92, -994) AS int_col +FROM table_1 tt1 +INNER JOIN table_18 tt2 ON (tt2.decimal1611_col_22) = (tt1.decimal1208_col_20) +WHERE +(t1.timestamp_col_10) = (tt2.timestamp_col_18)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT +COALESCE(498, LEAD(COALESCE(-973, -684, 515)) OVER (PARTITION BY (t2.int_col_2 + t1.smallint_col_25) ORDER BY (t2.int_col_2 + t1.smallint_col_25), FLOOR(t1.double_col_61) DESC), 524) AS int_col, +(t2.int_col_2) + (t1.smallint_col_25) AS int_col_1, +FLOOR(t1.double_col_61) AS float_col, +COALESCE(SUM(COALESCE(62, -380, -435)) OVER (PARTITION BY (t2.int_col_2 + t1.smallint_col_25) ORDER BY (t2.int_col_2 + t1.smallint_col_25) DESC, FLOOR(t1.double_col_61) DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 48 FOLLOWING), 704) AS int_col_2 +FROM table_1 t1 +INNER JOIN table_18 t2 ON (((t2.tinyint_col_6) = (t1.bigint_col_22)) AND ((t2.decimal0504_col_37) = (t1.decimal1709_col_26))) AND ((t2.tinyint_col_33) = (t1.tinyint_col_8)) +WHERE +(t2.smallint_col_38) IN (SELECT +COALESCE(-92, -994) AS int_col +FROM table_1 tt1 +INNER JOIN table_18 tt2 ON (tt2.decimal1611_col_22) = (tt1.decimal1208_col_20) +WHERE +(t1.timestamp_col_10) = (tt2.timestamp_col_18)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((bigint_col_22 is not null and decimal1709_col_26 is not null) and tinyint_col_8 is not null) and timestamp_col_10 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: bigint_col_22 (type: bigint), decimal1709_col_26 (type: decimal(38,23)), tinyint_col_8 (type: tinyint) + sort order: +++ + Map-reduce partition columns: bigint_col_22 (type: bigint), decimal1709_col_26 (type: decimal(38,23)), tinyint_col_8 (type: tinyint) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: smallint_col_25 (type: smallint), double_col_61 (type: double), timestamp_col_10 (type: timestamp) + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (((UDFToLong(tinyint_col_6) is not null and decimal0504_col_37 is not null) and tinyint_col_33 is not null) and UDFToInteger(smallint_col_38) is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: UDFToLong(tinyint_col_6) (type: bigint), decimal0504_col_37 (type: decimal(38,23)), tinyint_col_33 (type: tinyint) + sort order: +++ + Map-reduce partition columns: UDFToLong(tinyint_col_6) (type: bigint), decimal0504_col_37 (type: decimal(38,23)), tinyint_col_33 (type: tinyint) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: int_col_2 (type: int), smallint_col_38 (type: smallint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 bigint_col_22 (type: bigint), decimal1709_col_26 (type: decimal(38,23)), tinyint_col_8 (type: tinyint) + 1 UDFToLong(tinyint_col_6) (type: bigint), decimal0504_col_37 (type: decimal(38,23)), tinyint_col_33 (type: tinyint) + outputColumnNames: _col16, _col21, _col72, _col98, _col105 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: UDFToInteger(_col105) (type: int), _col72 (type: timestamp) + sort order: ++ + Map-reduce partition columns: UDFToInteger(_col105) (type: int), _col72 (type: timestamp) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col16 (type: smallint), _col21 (type: double), _col98 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: timestamp) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: timestamp) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 UDFToInteger(_col105) (type: int), _col72 (type: timestamp) + 1 _col0 (type: int), _col1 (type: timestamp) + outputColumnNames: _col16, _col21, _col98 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: (_col98 + _col16) (type: int), floor(_col21) (type: bigint) + sort order: +- + Map-reduce partition columns: (_col98 + _col16) (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col16 (type: smallint), _col21 (type: double), _col98 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col16 (type: smallint), VALUE._col21 (type: double), VALUE._col98 (type: int) + outputColumnNames: _col16, _col21, _col98 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col16: smallint, _col21: double, _col98: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: (_col98 + _col16), floor(_col21)(DESC) + partition by: (_col98 + _col16) + raw input shape: + window functions: + window function definition + alias: LEAD_window_0 + arguments: COALESCE((- 973),(- 684),515) + name: LEAD + window function: GenericUDAFLeadEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: LEAD_window_0 (type: int), _col16 (type: smallint), _col21 (type: double), _col98 (type: int) + outputColumnNames: LEAD_window_0, _col16, _col21, _col98 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: (_col98 + _col16) (type: int), floor(_col21) (type: bigint) + sort order: -- + Map-reduce partition columns: (_col98 + _col16) (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: LEAD_window_0 (type: int), _col16 (type: smallint), _col21 (type: double), _col98 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col17 (type: smallint), VALUE._col22 (type: double), VALUE._col99 (type: int) + outputColumnNames: _col0, _col17, _col22, _col99 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col17: smallint, _col22: double, _col99: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: (_col99 + _col17)(DESC), floor(_col22)(DESC) + partition by: (_col99 + _col17) + raw input shape: + window functions: + window function definition + alias: SUM_window_1 + arguments: COALESCE(62,(- 380),(- 435)) + name: SUM + window function: GenericUDAFSumLong + window frame: PRECEDING(MAX)~FOLLOWING(48) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: COALESCE(498,_col0,524) (type: int), (_col99 + _col17) (type: int), floor(_col22) (type: bigint), COALESCE(SUM_window_1,704) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: tt1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: decimal1208_col_20 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: decimal1208_col_20 (type: decimal(38,6)) + sort order: + + Map-reduce partition columns: decimal1208_col_20 (type: decimal(38,6)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + TableScan + alias: tt2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (decimal1611_col_22 is not null and timestamp_col_18 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: decimal1611_col_22 (type: decimal(38,6)) + sort order: + + Map-reduce partition columns: decimal1611_col_22 (type: decimal(38,6)) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: timestamp_col_18 (type: timestamp) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 decimal1208_col_20 (type: decimal(38,6)) + 1 decimal1611_col_22 (type: decimal(38,6)) + outputColumnNames: _col115 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col115 (type: timestamp) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: -92 (type: int), _col1 (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +