diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 0672e0e..ed26dea 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -258,6 +258,7 @@ minitez.query.files.shared=acid_globallimit.q,\ vector_coalesce.q,\ vector_coalesce_2.q,\ vector_complex_all.q,\ + vector_complex_join.q,\ vector_count_distinct.q,\ vector_data_types.q,\ vector_date_1.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 0552f9d..1eb960d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -342,7 +342,7 @@ protected OutputColumnManager(int initialOutputCol) { private final Set usedOutputColumns = new HashSet(); - int allocateOutputColumn(String hiveTypeName) { + int allocateOutputColumn(String hiveTypeName) throws HiveException { if (initialOutputCol < 0) { // This is a test return 0; @@ -403,7 +403,7 @@ void freeOutputColumn(int index) { } } - public int allocateScratchColumn(String hiveTypeName) { + public int allocateScratchColumn(String hiveTypeName) throws HiveException { return ocm.allocateOutputColumn(hiveTypeName); } @@ -2243,7 +2243,7 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio } } - static String getNormalizedName(String hiveTypeName) { + static String getNormalizedName(String hiveTypeName) throws HiveException { VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); switch (argType) { case INT_FAMILY: @@ -2269,11 +2269,11 @@ static String getNormalizedName(String hiveTypeName) { case INTERVAL_DAY_TIME: return hiveTypeName; default: - return "None"; + throw new HiveException("Unexpected hive type name " + hiveTypeName); } } - static String getUndecoratedName(String hiveTypeName) { + static String getUndecoratedName(String hiveTypeName) throws HiveException { VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); switch (argType) { case INT_FAMILY: @@ -2296,7 +2296,7 @@ static String getUndecoratedName(String hiveTypeName) { case INTERVAL_DAY_TIME: return hiveTypeName; default: - return "None"; + throw new HiveException("Unexpected hive type name " + hiveTypeName); } } @@ -2511,7 +2511,7 @@ public int compare(Integer o1, Integer o2) { } sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", "); - sb.append("scratchColumnTypeNames ").append(getScratchColumnTypeNames().toString()); + sb.append("scratchColumnTypeNames ").append(Arrays.toString(getScratchColumnTypeNames())); return sb.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index e26e31b..8ad7ca4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -255,7 +255,7 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, determineCommonInfo(isOuterJoin); } - protected void determineCommonInfo(boolean isOuter) { + protected void determineCommonInfo(boolean isOuter) throws HiveException { bigTableRetainedMapping = new VectorColumnOutputMapping("Big Table Retained Mapping"); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f674ece..d806b97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1362,6 +1362,13 @@ private boolean validateMapJoinDesc(MapJoinDesc desc) { LOG.info("Cannot vectorize map work value expression"); return false; } + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (!validateExprNodeDesc(smallTableExprs)) { + LOG.info("Cannot vectorize map work small table expression"); + return false; + } return true; } diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index 5628959..9d4ca76 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -158,8 +158,13 @@ private void prepareAbstractMapJoin(AbstractMapJoinOperator> keyMap = new HashMap>(); keyMap.put((byte)0, expr); + List smallTableExpr = new ArrayList(); + smallTableExpr.add(new ExprNodeColumnDesc(Integer.class, "col2", "T1", false)); + keyMap.put((byte)1, smallTableExpr); mjdesc.setKeys(keyMap); mjdesc.setExprs(keyMap); + Byte[] order = new Byte[] {(byte) 0, (byte) 1}; + mjdesc.setTagOrder(order); //Set filter expression GenericUDFOPEqual udf = new GenericUDFOPEqual(); diff --git ql/src/test/queries/clientpositive/vector_complex_join.q ql/src/test/queries/clientpositive/vector_complex_join.q new file mode 100644 index 0000000..30f38b1 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_complex_join.q @@ -0,0 +1,29 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +SET hive.vectorized.execution.enabled=true; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; + +-- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC; +INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1; + +explain +select * from alltypesorc join test where alltypesorc.cint=test.a; + +select * from alltypesorc join test where alltypesorc.cint=test.a; + + + +CREATE TABLE test2a (a ARRAY) STORED AS ORC; +INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1; + +CREATE TABLE test2b (a INT) STORED AS ORC; +INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4); + +explain +select * from test2b join test2a on test2b.a = test2a.a[1]; + +select * from test2b join test2a on test2b.a = test2a.a[1]; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_complex_join.q.out ql/src/test/results/clientpositive/tez/vector_complex_join.q.out new file mode 100644 index 0000000..dc988ef --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_complex_join.q.out @@ -0,0 +1,227 @@ +PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test +POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test +POSTHOOK: Lineage: test.a SIMPLE [] +POSTHOOK: Lineage: test.b EXPRESSION [] +c0 c1 +PREHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + input vertices: + 1 Map 2 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: map) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: map) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@test +#### A masked pattern was here #### +alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b +-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"} +PREHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2a +POSTHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2a +PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test2a +POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test2a +POSTHOOK: Lineage: test2a.a EXPRESSION [] +c0 +PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2b +POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2b +PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test2b +POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test2b +POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 +PREHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test2b + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 a (type: int) + 1 a[1] (type: int) + outputColumnNames: _col0, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col4 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: test2a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a[1] is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: a[1] (type: int) + sort order: + + Map-reduce partition columns: a[1] (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: a (type: array) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +PREHOOK: Input: default@test2a +PREHOOK: Input: default@test2b +#### A masked pattern was here #### +POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2a +POSTHOOK: Input: default@test2b +#### A masked pattern was here #### +test2b.a test2a.a +2 [1,2] diff --git ql/src/test/results/clientpositive/vector_complex_join.q.out ql/src/test/results/clientpositive/vector_complex_join.q.out new file mode 100644 index 0000000..002cdeb --- /dev/null +++ ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: -- From HIVE-10729. Not expected to vectorize this query. +-- +CREATE TABLE test (a INT, b MAP) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test +POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test +POSTHOOK: Lineage: test.a SIMPLE [] +POSTHOOK: Lineage: test.b EXPRESSION [] +c0 c1 +PREHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:test + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:test + TableScan + alias: test + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int), b (type: map) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@test +#### A masked pattern was here #### +alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b +-51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"} +PREHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2a +POSTHOOK: query: CREATE TABLE test2a (a ARRAY) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2a +PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test2a +POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test2a +POSTHOOK: Lineage: test2a.a EXPRESSION [] +c0 +PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2b +POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2b +PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@test2b +POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@test2b +POSTHOOK: Lineage: test2b.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +_col0 +PREHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + test2b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + test2b + TableScan + alias: test2b + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 a (type: int) + 1 a[1] (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: test2a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: a[1] is not null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 a (type: int) + 1 a[1] (type: int) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col4 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +PREHOOK: type: QUERY +PREHOOK: Input: default@test2a +PREHOOK: Input: default@test2b +#### A masked pattern was here #### +POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test2a +POSTHOOK: Input: default@test2b +#### A masked pattern was here #### +test2b.a test2a.a +2 [1,2]