diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java index 3c6378e3f6..b1d65287d9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -403,8 +403,8 @@ public void process(Object row, int tag) throws HiveException { groupBatches.fillGroupResultsAndForward(this, batch); } - // If we are only processing a PARTITION BY, reset our evaluators. - if (!isPartitionOrderBy) { + // If we are only processing a PARTITION BY and isLastGroupBatch, reset our evaluators. + if (!isPartitionOrderBy && isLastGroupBatch) { groupBatches.resetEvaluators(); } } diff --git ql/src/test/queries/clientpositive/vector_windowing_row_number.q ql/src/test/queries/clientpositive/vector_windowing_row_number.q new file mode 100644 index 0000000000..673a9ad3d4 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_windowing_row_number.q @@ -0,0 +1,75 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table row_number_test; + +create table row_number_test as select explode(split(repeat("w,", 2400), ",")); + +insert into row_number_test select explode(split(repeat("x,", 1200), ",")); + +insert into row_number_test select explode(split(repeat("y,", 700), ",")); + +insert into row_number_test select explode(split(repeat("z,", 600), ",")); + +explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test; + +create table row_numbers_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +SET hive.vectorized.execution.enabled=false; +SET hive.vectorized.execution.reduce.enabled=false; +set hive.vectorized.execution.ptf.enabled=false; + +explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test; + +create table row_numbers_non_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +-- compare results of vectorized with those of non-vectorized execution + +select exists( +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized +minus +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized +) diff_exists; + +drop table row_numbers_non_vectorized; +drop table row_numbers_vectorized; +drop table row_number_test; diff --git ql/src/test/results/clientpositive/vector_windowing_row_number.q.out ql/src/test/results/clientpositive/vector_windowing_row_number.q.out new file mode 100644 index 0000000000..81ffa0229f --- /dev/null +++ ql/src/test/results/clientpositive/vector_windowing_row_number.q.out @@ -0,0 +1,926 @@ +PREHOOK: query: drop table row_number_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table row_number_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table row_number_test as select explode(split(repeat("w,", 2400), ",")) +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@row_number_test +POSTHOOK: query: create table row_number_test as select explode(split(repeat("w,", 2400), ",")) +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: insert into row_number_test select explode(split(repeat("x,", 1200), ",")) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@row_number_test +POSTHOOK: query: insert into row_number_test select explode(split(repeat("x,", 1200), ",")) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: insert into row_number_test select explode(split(repeat("y,", 700), ",")) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@row_number_test +POSTHOOK: query: insert into row_number_test select explode(split(repeat("y,", 700), ",")) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: insert into row_number_test select explode(split(repeat("z,", 600), ",")) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@row_number_test +POSTHOOK: query: insert into row_number_test select explode(split(repeat("z,", 600), ",")) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@row_number_test +POSTHOOK: Lineage: row_number_test.col SCRIPT [] +col +PREHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +PREHOOK: type: QUERY +PREHOOK: Input: default@row_number_test +#### A masked pattern was here #### +POSTHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@row_number_test +#### A masked pattern was here #### +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: row_number_test + Statistics: Num rows: 4904 Data size: 416840 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 416840 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: col (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_0 (type: int), _col0 (type: string) + outputColumnNames: row_number_window_0, _col0 + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), _col0 (type: string) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_1 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_1 (type: int), _col0 (type: int), _col1 (type: string) + outputColumnNames: row_number_window_1, _col0, _col1 + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_1 (type: int), _col0 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: row_number_window_2 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: row_number_window_2, _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS LAST + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: row_number_window_3 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: row_number_window_3, _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), _col3 (type: string) + null sort order: az + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_4 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: string) + outputColumnNames: row_number_window_4, _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: string), 2 (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: row_number_window_5 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: row_number_window_5, _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), 2 (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 4904 Data size: 1848808 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: int, _col6: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_6 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1848808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: int), _col4 (type: int), _col3 (type: int), _col2 (type: int), _col1 (type: int), _col0 (type: int), row_number_window_6 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4904 Data size: 554152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4904 Data size: 554152 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table row_numbers_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@row_number_test +PREHOOK: Output: database:default +PREHOOK: Output: default@row_numbers_vectorized +POSTHOOK: query: create table row_numbers_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@row_number_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@row_numbers_vectorized +POSTHOOK: Lineage: row_numbers_vectorized.col SIMPLE [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r1 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r2 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r3 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r4 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r5 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r6 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_vectorized.r7 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +r1 r2 r3 r4 r5 r6 r7 col +PREHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +PREHOOK: type: QUERY +PREHOOK: Input: default@row_number_test +#### A masked pattern was here #### +POSTHOOK: query: explain select + row_number() over() as r1, + row_number() over(order by col) r2, + row_number() over(partition by col) r3, + row_number() over(partition by col order by col) r4, + row_number() over(partition by 1 order by col) r5, + row_number() over(partition by col order by 2) r6, + row_number() over(partition by 1 order by 2) r7, + col + from row_number_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@row_number_test +#### A masked pattern was here #### +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-7 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: row_number_test + Statistics: Num rows: 4904 Data size: 416840 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 416840 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: col (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_0 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_0 (type: int), _col0 (type: string) + outputColumnNames: row_number_window_0, _col0 + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 0 (type: int), _col0 (type: string) + null sort order: az + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 4904 Data size: 1731112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS LAST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: row_number_window_1 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_1 (type: int), _col0 (type: int), _col1 (type: string) + outputColumnNames: row_number_window_1, _col0, _col1 + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4904 Data size: 1750728 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_1 (type: int), _col0 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col2 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: row_number_window_2 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: row_number_window_2, _col0, _col1, _col2 + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 4904 Data size: 1770344 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_2 (type: int), _col0 (type: int), _col1 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS LAST + partition by: _col3 + raw input shape: + window functions: + window function definition + alias: row_number_window_3 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: row_number_window_3, _col0, _col1, _col2, _col3 + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), _col3 (type: string) + null sort order: az + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 1789960 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_3 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_4 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: string) + outputColumnNames: row_number_window_4, _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col4 (type: string), 2 (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 4904 Data size: 1809576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_4 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: _col5 + raw input shape: + window functions: + window function definition + alias: row_number_window_5 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: row_number_window_5, _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: 1 (type: int), 2 (type: int) + null sort order: az + sort order: ++ + Map-reduce partition columns: 1 (type: int) + Statistics: Num rows: 4904 Data size: 1829192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: row_number_window_5 (type: int), _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 4904 Data size: 1848808 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: int, _col2: int, _col3: int, _col4: int, _col5: int, _col6: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 2 ASC NULLS LAST + partition by: 1 + raw input shape: + window functions: + window function definition + alias: row_number_window_6 + name: row_number + window function: GenericUDAFRowNumberEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 4904 Data size: 1848808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col5 (type: int), _col4 (type: int), _col3 (type: int), _col2 (type: int), _col1 (type: int), _col0 (type: int), row_number_window_6 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 4904 Data size: 554152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4904 Data size: 554152 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table row_numbers_non_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@row_number_test +PREHOOK: Output: database:default +PREHOOK: Output: default@row_numbers_non_vectorized +POSTHOOK: query: create table row_numbers_non_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@row_number_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@row_numbers_non_vectorized +POSTHOOK: Lineage: row_numbers_non_vectorized.col SIMPLE [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r1 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r2 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r3 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r4 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r5 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r6 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +POSTHOOK: Lineage: row_numbers_non_vectorized.r7 SCRIPT [(row_number_test)row_number_test.FieldSchema(name:col, type:string, comment:null), ] +r1 r2 r3 r4 r5 r6 r7 col +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select exists( +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized +minus +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized +) diff_exists +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@row_numbers_non_vectorized +PREHOOK: Input: default@row_numbers_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select exists( +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized +minus +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized +) diff_exists +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@row_numbers_non_vectorized +POSTHOOK: Input: default@row_numbers_vectorized +#### A masked pattern was here #### +diff_exists +false +PREHOOK: query: drop table row_numbers_non_vectorized +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@row_numbers_non_vectorized +PREHOOK: Output: default@row_numbers_non_vectorized +POSTHOOK: query: drop table row_numbers_non_vectorized +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@row_numbers_non_vectorized +POSTHOOK: Output: default@row_numbers_non_vectorized +PREHOOK: query: drop table row_numbers_vectorized +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@row_numbers_vectorized +PREHOOK: Output: default@row_numbers_vectorized +POSTHOOK: query: drop table row_numbers_vectorized +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@row_numbers_vectorized +POSTHOOK: Output: default@row_numbers_vectorized +PREHOOK: query: drop table row_number_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@row_number_test +PREHOOK: Output: default@row_number_test +POSTHOOK: query: drop table row_number_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@row_number_test +POSTHOOK: Output: default@row_number_test