Index: ql/src/test/results/clientpositive/vectorized_context.q.out =================================================================== --- ql/src/test/results/clientpositive/vectorized_context.q.out (revision 0) +++ ql/src/test/results/clientpositive/vectorized_context.q.out (revision 0) @@ -0,0 +1,379 @@ +PREHOOK: query: create table store(s_store_sk int, s_city string) +stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table store(s_store_sk int, s_city string) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@store +PREHOOK: query: insert overwrite table store +select cint, cstring1 +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@store +POSTHOOK: query: insert overwrite table store +select cint, cstring1 +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@store +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +PREHOOK: query: create table store_sales(ss_store_sk int, ss_hdemo_sk int, ss_net_profit double) +stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table store_sales(ss_store_sk int, ss_hdemo_sk int, ss_net_profit double) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +PREHOOK: query: insert overwrite table store_sales +select cint, cint, cdouble +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@store_sales +POSTHOOK: query: insert overwrite table store_sales +select cint, cint, cdouble +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +PREHOOK: query: create table household_demographics(hd_demo_sk int) +stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table household_demographics(hd_demo_sk int) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@household_demographics +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +PREHOOK: query: insert overwrite table household_demographics +select cint +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@household_demographics +POSTHOOK: query: insert overwrite table household_demographics +select cint +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@household_demographics +POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +PREHOOK: query: explain +select store.s_city, ss_net_profit +from store_sales +JOIN store ON store_sales.ss_store_sk = store.s_store_sk +JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk +limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select store.s_city, ss_net_profit +from store_sales +JOIN store ON store_sales.ss_store_sk = store.s_store_sk +JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME store_sales)) (TOK_TABREF (TOK_TABNAME store)) (= (. (TOK_TABLE_OR_COL store_sales) ss_store_sk) (. (TOK_TABLE_OR_COL store) s_store_sk))) (TOK_TABREF (TOK_TABNAME household_demographics)) (= (. (TOK_TABLE_OR_COL store_sales) ss_hdemo_sk) (. (TOK_TABLE_OR_COL household_demographics) hd_demo_sk)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL store) s_city)) (TOK_SELEXPR (TOK_TABLE_OR_COL ss_net_profit))) (TOK_LIMIT 100))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-4 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + household_demographics + Fetch Operator + limit: -1 + store_sales + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + household_demographics + TableScan + alias: household_demographics + HashTable Sink Operator + condition expressions: + 0 {_col6} {_col2} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[hd_demo_sk]] + Position of Big Table: 0 + store_sales + TableScan + alias: store_sales + HashTable Sink Operator + condition expressions: + 0 {ss_hdemo_sk} {ss_net_profit} + 1 {s_city} + handleSkewJoin: false + keys: + 0 [Column[ss_store_sk]] + 1 [Column[s_store_sk]] + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + store + TableScan + alias: store + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {ss_hdemo_sk} {ss_net_profit} + 1 {s_city} + handleSkewJoin: false + keys: + 0 [Column[ss_store_sk]] + 1 [Column[s_store_sk]] + outputColumnNames: _col1, _col2, _col6 + Position of Big Table: 1 + Vectorized execution: true + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col6} {_col2} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[hd_demo_sk]] + outputColumnNames: _col1, _col6 + Position of Big Table: 0 + Vectorized execution: true + Select Operator + expressions: + expr: _col1 + type: string + expr: _col6 + type: double + outputColumnNames: _col0, _col1 + Vectorized execution: true + Limit + Vectorized execution: true + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Vectorized execution: true + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: 100 + + +PREHOOK: query: select store.s_city, ss_net_profit +from store_sales +JOIN store ON store_sales.ss_store_sk = store.s_store_sk +JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@household_demographics +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select store.s_city, ss_net_profit +from store_sales +JOIN store ON store_sales.ss_store_sk = store.s_store_sk +JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@household_demographics +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +LFgU5WT87C2yJ4W4YU0r8Pp NULL +v3p153e2bSkGS70v04G NULL +0pOH7A4O8aQ37NuBqn NULL +8ShAFcD734S8Q26WjMwpq0Q NULL +nOF31ehjY7ULCHMf NULL +t32s57Cjt4a250qQgVNAB5T NULL +nvO822k30OaH37Il NULL +M152O NULL +FgJ7Hft6845s1766oyt82q NULL +0ovL2T NULL +3e27C1jTdTQPdvCWi4if NULL +XWIExC7NI3bqu6VhR14g2 NULL +6g482F6IEbD2mKeLE153e0w NULL +2diFRgr78diK6rSl0J NULL +21UE6fJyy NULL +H3bTj310QaL012cPe NULL +7342q5oFQL8QIl7cO NULL +VkXY4IOSO NULL +4K1nnlkt7786Sq8x0ARXtr NULL +m4eSLx4qihVg1e32 NULL +OSBq0b NULL +aKbAu2WJV8HWHU6K1Ukq NULL +LcfhOxSVg68ACRvw1xC7LU NULL +AwVW3sV2gsM NULL +Tqar00A NULL +mC4mr NULL +YHVB0 NULL +2vtmB0qNlHlGV15P1p NULL +2wbgE0Yo1RX82H2sp4f1l5 NULL +BSmA3fAai62QpNjmL66y8d NULL +314nQ6nVj NULL +H8mh48T7 NULL +U616In80F54RI NULL +BuSLb058f2 NULL +OSc0r NULL +75KN62a2iAf0j5Jol77wH7 NULL +66Mx4v NULL +7SchQY2j74BW7dQNy5G5 NULL +FEefA NULL +P2DNeo00PA7DJF0 NULL +SMXqH NULL +6fB40r75kxeX3k10 NULL +AmYxfSOBdJv8B48l0VAeeI NULL +S87OO NULL +0EIL81O NULL +dG8B5PQ3b85U362G6huu NULL +XOypj8 NULL +61eT82N24 NULL +lVfv3fD1jn532h3K67H NULL +J1an665U NULL +Y6P8Ji868U7u8W3X2GHNiOLh NULL +wXbLC0LS2bFf12f1ljC NULL +j0L50J2e82 NULL +8EPG0Xi307qd NULL +04Y1mA17 NULL +lTLWdPg0yM0IgY76s70 NULL +KDr0tMRnCJJIBA84 NULL +71KN0p4NhE4xm4ixm NULL +u6HT8fTw6IgPf2 NULL +7WYO11kWn6fT2pOlh5sTDIwG NULL +Yc6gaH2OFF7cymt8q23Fr NULL +RQbQ5 NULL +75Y6J NULL +eUx01FREb2LD4kle4dpS NULL +T0Y8Vi41EYW4CpQ6Hg1Xg30w NULL +Egf7KV7TeT NULL +LIJuG07tfqoLu8K NULL +uUTO41xk6VyqYPh NULL +aEvOE7hUNO0d67AM3V7BwUCK NULL +8AqHq NULL +gl03UrAU4bWrOvqwwf NULL +NULL NULL +LX6QHG6sEmBAIbA6e6Am24 NULL +i330V4Y0Lm4ajyKqM1X2Y NULL +64K51WMTs NULL +iW12567av NULL +v3U315C36UQ4oEW NULL +niiH6MSNaSk4fRRb74o1y28c NULL +p4WmTkrM NULL +L1Q62u2 NULL +hnrm68NiEQCL4 NULL +fju0XS06MyUS7Nqk8P8 NULL +0VWukLt NULL +642LsMiNArr0ufitL3l7RCU7 NULL +DWNvg304j4KTMEs2174Cy1 NULL +DU1m68i1Q7W3 NULL +44vcS2S5wu684R05fq01fu NULL +eu3X5Qfp4sHv5H NULL +QbdFB1d7vfaM7 NULL +s43i4lU NULL +0pOTqi3O44rEnGQ NULL +32cB3f NULL +c300w5 NULL +w66f63n NULL +iR76SEs2C4V NULL +ss2PoJAipj6B1tn75O NULL +n3ner11ab4 NULL +r17jGvc7gR NULL +5G1Xp277YJRklEO5kHx NULL +B78T0SnxlCe5AQ522GBUf6c6 NULL +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +PREHOOK: query: drop table household_demographics +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@household_demographics +PREHOOK: Output: default@household_demographics +POSTHOOK: query: drop table household_demographics +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@household_demographics +POSTHOOK: Output: default@household_demographics +POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_city SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: store.s_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ] +POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ] Index: ql/src/test/queries/clientpositive/vectorized_context.q =================================================================== --- ql/src/test/queries/clientpositive/vectorized_context.q (revision 0) +++ ql/src/test/queries/clientpositive/vectorized_context.q (revision 0) @@ -0,0 +1,47 @@ +create table store(s_store_sk int, s_city string) +stored as orc; +insert overwrite table store +select cint, cstring1 +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679); +create table store_sales(ss_store_sk int, ss_hdemo_sk int, ss_net_profit double) +stored as orc; +insert overwrite table store_sales +select cint, cint, cdouble +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679); +create table household_demographics(hd_demo_sk int) +stored as orc; +insert overwrite table household_demographics +select cint +from alltypesorc +where cint not in ( +-3728, -563, 762, 6981, 253665376, 528534767, 626923679); +set hive.auto.convert.join=true; +set hive.vectorized.execution.enabled=true; + + +explain +select store.s_city, ss_net_profit +from store_sales +JOIN store ON store_sales.ss_store_sk = store.s_store_sk +JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk +limit 100 +; + +select store.s_city, ss_net_profit +from store_sales +JOIN store ON store_sales.ss_store_sk = store.s_store_sk +JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk +limit 100 +; + +set hive.auto.convert.join=false; +set hive.vectorized.execution.enabled=false; + +drop table store; +drop table store_sales; +drop table household_demographics; + Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (revision 1545372) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (working copy) @@ -23,9 +23,11 @@ import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.Stack; @@ -48,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -342,8 +345,17 @@ topNodes.addAll(mapWork.getAliasToWork().values()); HashMap nodeOutput = new HashMap(); ogw.startWalking(topNodes, nodeOutput); - mapWork.setScratchColumnVectorTypes(vnp.getScratchColumnVectorTypes()); - mapWork.setScratchColumnMap(vnp.getScratchColumnMap()); + + Map> columnVectorTypes = vnp.getScratchColumnVectorTypes(); + mapWork.setScratchColumnVectorTypes(columnVectorTypes); + Map> columnMap = vnp.getScratchColumnMap(); + mapWork.setScratchColumnMap(columnMap); + + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString())); + LOG.debug(String.format("columnMap: %s", columnMap.toString())); + } + return; } } @@ -411,33 +423,42 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - Node firstOp = stack.firstElement(); - TableScanOperator tsOp = null; - - tsOp = (TableScanOperator) firstOp; - - VectorizationContext vContext = vContextsByTSOp.get(tsOp); - if (vContext == null) { - String fileKey = ""; + Operator op = (Operator) nd; + + VectorizationContext vContext = null; + + if (op instanceof TableScanOperator) { + vContext = getVectorizationContext(op, physicalContext); for (String onefile : mWork.getPathToAliases().keySet()) { List aliases = mWork.getPathToAliases().get(onefile); for (String alias : aliases) { - Operator op = mWork.getAliasToWork().get(alias); - if (op == tsOp) { - fileKey = onefile; - if (vContext == null) { - vContext = getVectorizationContext(tsOp, physicalContext); - } - vContext.setFileKey(fileKey); - vectorizationContexts.put(fileKey, vContext); + Operator opRoot = mWork.getAliasToWork().get(alias); + if (op == opRoot) { + // The same vectorization context is copied multiple times into + // the MapWork scratch columnMap + // Each partition gets a copy + // + vContext.setFileKey(onefile); + vectorizationContexts.put(onefile, vContext); break; } } } - vContextsByTSOp.put(tsOp, vContext); + vContextsByTSOp.put(op, vContext); + } else { + assert stack.size() > 1; + // Walk down the stack of operators until we found one willing to give us a context. + // At the bottom will be the TS operator, guaranteed to have a context + int i= stack.size()-2; + while (vContext == null) { + Operator opParent = (Operator) stack.get(i); + vContext = vContextsByTSOp.get(opParent); + --i; + } } - - Operator op = (Operator) nd; + + assert vContext != null; + if (op.getType().equals(OperatorType.REDUCESINK) && op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) { // No need to vectorize @@ -453,6 +474,12 @@ if (vectorOp != op) { opsDone.add(vectorOp); } + if (vectorOp instanceof VectorizationContextRegion) { + VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; + VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext(); + vContextsByTSOp.put(op, vOutContext); + vectorizationContexts.put(vOutContext.getFileKey(), vOutContext); + } } } catch (HiveException e) { throw new SemanticException(e); @@ -678,7 +705,7 @@ return supportedDataTypes.contains(type.toLowerCase()); } - private VectorizationContext getVectorizationContext(TableScanOperator op, + private VectorizationContext getVectorizationContext(Operator op, PhysicalContext pctx) { RowResolver rr = pctx.getParseContext().getOpParseCtx().get(op).getRowResolver(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java (revision 1545372) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java (working copy) @@ -177,51 +177,6 @@ this.valueWriters = valueWriters; } - public static VectorizedRowBatch buildBatch(Map typeMap, - Map columnMap) throws HiveException { - - Map mapVectorColumn = new HashMap(typeMap.size()); - int maxIndex = 0; - - Iterator> typeMapIt = typeMap.entrySet().iterator(); - while(typeMapIt.hasNext()) { - Entry type = typeMapIt.next(); - ColumnVector cv = VectorizationContext.allocateColumnVector(type.getValue(), - VectorizedRowBatch.DEFAULT_SIZE); - mapVectorColumn.put(type.getKey(), cv); - if (maxIndex < type.getKey()) { - maxIndex = type.getKey(); - } - } - - VectorizedRowBatch batch = new VectorizedRowBatch(maxIndex+1); - for(int i=0; i <= maxIndex; ++i) { - ColumnVector cv = mapVectorColumn.get(i); - if (cv == null) { - // allocate a default type for the unused column. - // there are APIs that expect all cols[i] to be non NULL - cv = VectorizationContext.allocateColumnVector("long", - VectorizedRowBatch.DEFAULT_SIZE); - } - batch.cols[i] = cv; - } - - // Validate that every column in the column map exists - Iterator> columnMapIt = columnMap.entrySet().iterator(); - while(columnMapIt.hasNext()) { - Entry cm = columnMapIt.next(); - if (batch.cols.length <= cm.getValue() || batch.cols[cm.getValue()] == null) { - throw new HiveException(String.format( - "Internal error: The type map has no entry for column %d %s", - cm.getValue(), cm.getKey())); - } - } - - batch.reset(); - - return batch; - } - /** * Resets the row batch to default state * - sets selectedInUse to false Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContextRegion.java (revision 0) @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * VectorizationContextRegion optional interface implemented by vectorized operators + * that are changing the vectorizaiton context (region boundary operators) + */ +public interface VectorizationContextRegion { + + VectorizationContext getOuputVectorizationContext(); + +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (revision 1545372) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (working copy) @@ -96,6 +96,25 @@ public VectorizedRowBatchCtx() { } + + /** + * Initializes the VectorizedRowBatch context based on an arbitrary object inspector + * Used by non-tablescan operators when they change the vectorization context + * @param hiveConf + * @param fileKey + * The key on which to retrieve the extra column mapping from the map scratch + * @param rowOI + * Object inspector that shapes the column types + */ + public void init(Configuration hiveConf, String fileKey, + StructObjectInspector rowOI) { + columnTypeMap = Utilities + .getMapRedWork(hiveConf).getMapWork().getScratchColumnVectorTypes() + .get(fileKey); + this.rowOI= rowOI; + this.rawRowOI = rowOI; + } + /** * Initializes VectorizedRowBatch context based on the @@ -251,6 +270,7 @@ } result.numCols = fieldRefs.size(); this.addScratchColumnsToBatch(result); + result.reset(); return result; } @@ -351,4 +371,5 @@ return new LongColumnVector(defaultSize); } } + } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (revision 1545372) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -39,11 +40,12 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * The vectorized version of the MapJoinOperator. */ -public class VectorMapJoinOperator extends MapJoinOperator { +public class VectorMapJoinOperator extends MapJoinOperator implements VectorizationContextRegion { private static final Log LOG = LogFactory.getLog( VectorMapJoinOperator.class.getName()); @@ -77,7 +79,10 @@ // private transient int batchIndex; private transient VectorHashKeyWrapper[] keyValues; - + + private transient VectorizationContext vOutContext = null; + private transient VectorizedRowBatchCtx vrbCtx = null; + public VectorMapJoinOperator() { super(); } @@ -113,37 +118,29 @@ bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); List outColNames = desc.getOutputColumnNames(); - int outputColumnIndex = 0; - - Map cMap = vContext.getColumnMap(); - for(byte alias:order) { - for(ExprNodeDesc expr: exprs.get(alias)) { - String columnName = outColNames.get(outputColumnIndex); - if (!cMap.containsKey(columnName)) { - vContext.addOutputColumn(columnName, expr.getTypeString()); - } - ++outputColumnIndex; - } + + Map mapOutCols = new HashMap(outColNames.size()); + + int outColIndex = 0; + for(String outCol: outColNames) { + mapOutCols.put(outCol, outColIndex++); } - - this.fileKey = vContext.getFileKey(); + + vOutContext = new VectorizationContext(mapOutCols, outColIndex); + vOutContext.setFileKey(vContext.getFileKey() + "/MAP_JOIN_" + desc.getBigTableAlias()); + this.fileKey = vOutContext.getFileKey(); } @Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); + - Map> allTypeMaps = Utilities. - getMapRedWork(hconf).getMapWork().getScratchColumnVectorTypes(); - Map typeMap = allTypeMaps.get(fileKey); + vrbCtx = new VectorizedRowBatchCtx(); + vrbCtx.init(hconf, this.fileKey, (StructObjectInspector) this.outputObjInspector); + + outputBatch = vrbCtx.createVectorizedRowBatch(); - Map> allColumnMaps = Utilities. - getMapRedWork(hconf).getMapWork().getScratchColumnMap(); - - Map columnMap = allColumnMaps.get(fileKey); - - outputBatch = VectorizedRowBatch.buildBatch(typeMap, columnMap); - keyWrapperBatch =VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); // This key evaluator translates from the vectorized VectorHashKeyWrapper format @@ -298,4 +295,9 @@ batchIndex = -1; keyValues = null; } + + @Override + public VectorizationContext getOuputVectorizationContext() { + return vOutContext; + } }