diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java index 912b6f8..5f3ebde 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java @@ -178,6 +178,10 @@ public void assign(VectorExpressionWriter[] writers, numDistributionKeys; cachedKeys = new Object[numKeys][keyLen]; cachedValues = new Object[valueEval.length]; + + int tag = conf.getTag(); + tagByte[0] = (byte) tag; + LOG.info("Using tag = " + tag); } catch(Exception e) { throw new HiveException(e); diff --git ql/src/test/queries/clientpositive/vectorized_shufflejoin.q ql/src/test/queries/clientpositive/vectorized_shufflejoin.q new file mode 100644 index 0000000..6b60aa0 --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_shufflejoin.q @@ -0,0 +1,10 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.auto.convert.join=false; + +EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint; + +SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint; \ No newline at end of file diff --git ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out new file mode 100644 index 0000000..5d54baf --- /dev/null +++ ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -0,0 +1,149 @@ +PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME alltypesorc) t1) (TOK_TABREF (TOK_TABNAME alltypesorc) t2) (= (. (TOK_TABLE_OR_COL t1) cint) (. (TOK_TABLE_OR_COL t2) cint)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL t1) cint))) (TOK_SELEXPR (TOK_FUNCTION MAX (. (TOK_TABLE_OR_COL t2) cint))) (TOK_SELEXPR (TOK_FUNCTION MIN (. (TOK_TABLE_OR_COL t1) cint))) (TOK_SELEXPR (TOK_FUNCTION AVG (+ (. (TOK_TABLE_OR_COL t1) cint) (. (TOK_TABLE_OR_COL t2) cint))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Reduce Output Operator + key expressions: + expr: cint + type: int + sort order: + + Map-reduce partition columns: + expr: cint + type: int + tag: 0 + value expressions: + expr: cint + type: int + Vectorized execution: true + t2 + TableScan + alias: t2 + Reduce Output Operator + key expressions: + expr: cint + type: int + sort order: + + Map-reduce partition columns: + expr: cint + type: int + tag: 1 + value expressions: + expr: cint + type: int + Vectorized execution: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col2} + 1 {VALUE._col2} + handleSkewJoin: false + outputColumnNames: _col2, _col16 + Select Operator + expressions: + expr: _col2 + type: int + expr: _col16 + type: int + outputColumnNames: _col2, _col16 + Group By Operator + aggregations: + expr: count(_col2) + expr: max(_col16) + expr: min(_col2) + expr: avg((_col2 + _col16)) + bucketGroup: false + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + expr: _col1 + type: int + expr: _col2 + type: int + expr: _col3 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: max(VALUE._col1) + expr: min(VALUE._col2) + expr: avg(VALUE._col3) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: bigint + expr: _col1 + type: int + expr: _col2 + type: int + expr: _col3 + type: double + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 1073680599 -1073279343 9.375396162525452E8