diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 9bd811c..e815774 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -174,7 +174,9 @@ protected Object _evaluate(Object row, int version) throws HiveException { joinValues[posBigTable] = vectorNodeEvaluators; // Filtering is handled in the input batch processing - filterMaps[posBigTable] = null; + if (filterMaps != null) { + filterMaps[posBigTable] = null; + } return result; } diff --git ql/src/test/queries/clientpositive/vector_auto_sortmerge_join_1.q ql/src/test/queries/clientpositive/vector_auto_sortmerge_join_1.q new file mode 100644 index 0000000..808f425 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_auto_sortmerge_join_1.q @@ -0,0 +1,51 @@ +SET hive.enforce.bucketing = true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.fetch.task.conversion=none; + +-- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small_txt (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08'); +load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08'); + +CREATE TABLE bucket_big_txt (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08'); +load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08'); + +load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09'); +load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09'); +load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09'); +load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09'); + +CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC; + +INSERT INTO TABLE bucket_small partition(ds='2008-04-08') SELECT key, value FROM bucket_small_txt WHERE ds='2008-04-08'; + +CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORC; + +INSERT INTO TABLE bucket_big partition(ds='2008-04-08') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-08'; +INSERT INTO TABLE bucket_big partition(ds='2008-04-09') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-09'; + +set hive.auto.convert.join=true; +set hive.auto.convert.sortmerge.join=true; +set hive.optimize.bucketmapjoin = true; +set hive.optimize.bucketmapjoin.sortedmerge = true; + +set hive.auto.convert.sortmerge.join.to.mapjoin=false; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; + +-- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; +select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; + +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; +select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; + +set hive.auto.convert.sortmerge.join.to.mapjoin=true; +explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; +select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; + diff --git ql/src/test/results/clientpositive/tez/vector_auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/tez/vector_auto_sortmerge_join_1.q.out new file mode 100644 index 0000000..61489e1 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_auto_sortmerge_join_1.q.out @@ -0,0 +1,1086 @@ +PREHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small_txt (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small_txt +POSTHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small_txt (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small_txt +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small_txt +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small_txt +POSTHOOK: Output: default@bucket_small_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small_txt@ds=2008-04-08 +PREHOOK: query: CREATE TABLE bucket_big_txt (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big_txt +POSTHOOK: query: CREATE TABLE bucket_big_txt (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big_txt +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: INSERT INTO TABLE bucket_small partition(ds='2008-04-08') SELECT key, value FROM bucket_small_txt WHERE ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_small_txt +PREHOOK: Input: default@bucket_small_txt@ds=2008-04-08 +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: INSERT INTO TABLE bucket_small partition(ds='2008-04-08') SELECT key, value FROM bucket_small_txt WHERE ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_small_txt +POSTHOOK: Input: default@bucket_small_txt@ds=2008-04-08 +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: Lineage: bucket_small PARTITION(ds=2008-04-08).key SIMPLE [(bucket_small_txt)bucket_small_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucket_small PARTITION(ds=2008-04-08).value SIMPLE [(bucket_small_txt)bucket_small_txt.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-08') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big_txt +PREHOOK: Input: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-08') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big_txt +POSTHOOK: Input: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-08).key SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-08).value SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-09') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-09' +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big_txt +PREHOOK: Input: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-09') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-09' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big_txt +POSTHOOK: Input: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-09).key SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-09).value SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 10 + partition_columns ds + partition_columns.types string + rawDataSize 1760 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 732 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 1 => 5 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Execution mode: vectorized + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 5 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 10 + partition_columns ds + partition_columns.types string + rawDataSize 1760 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 732 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Execution mode: vectorized + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 5 + keys: + 0 key (type: string) + 1 key (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 10 + partition_columns ds + partition_columns.types string + rawDataSize 1760 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 732 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Execution mode: vectorized + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 diff --git ql/src/test/results/clientpositive/vector_auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/vector_auto_sortmerge_join_1.q.out new file mode 100644 index 0000000..e4b6f7c --- /dev/null +++ ql/src/test/results/clientpositive/vector_auto_sortmerge_join_1.q.out @@ -0,0 +1,1429 @@ +PREHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small_txt (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small_txt +POSTHOOK: query: -- small 1 part, 2 bucket & big 2 part, 4 bucket +CREATE TABLE bucket_small_txt (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small_txt +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small_txt +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small_txt +POSTHOOK: Output: default@bucket_small_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_small_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_small_txt@ds=2008-04-08 +PREHOOK: query: CREATE TABLE bucket_big_txt (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big_txt +POSTHOOK: query: CREATE TABLE bucket_big_txt (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big_txt +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-08') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big_txt partition(ds='2008-04-09') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_small +POSTHOOK: query: CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) +CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_small +PREHOOK: query: INSERT INTO TABLE bucket_small partition(ds='2008-04-08') SELECT key, value FROM bucket_small_txt WHERE ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_small_txt +PREHOOK: Input: default@bucket_small_txt@ds=2008-04-08 +PREHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: query: INSERT INTO TABLE bucket_small partition(ds='2008-04-08') SELECT key, value FROM bucket_small_txt WHERE ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_small_txt +POSTHOOK: Input: default@bucket_small_txt@ds=2008-04-08 +POSTHOOK: Output: default@bucket_small@ds=2008-04-08 +POSTHOOK: Lineage: bucket_small PARTITION(ds=2008-04-08).key SIMPLE [(bucket_small_txt)bucket_small_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucket_small PARTITION(ds=2008-04-08).value SIMPLE [(bucket_small_txt)bucket_small_txt.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket_big +POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket_big +PREHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-08') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big_txt +PREHOOK: Input: default@bucket_big_txt@ds=2008-04-08 +PREHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-08') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big_txt +POSTHOOK: Input: default@bucket_big_txt@ds=2008-04-08 +POSTHOOK: Output: default@bucket_big@ds=2008-04-08 +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-08).key SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-08).value SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-09') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-09' +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big_txt +PREHOOK: Input: default@bucket_big_txt@ds=2008-04-09 +PREHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: query: INSERT INTO TABLE bucket_big partition(ds='2008-04-09') SELECT key, value FROM bucket_big_txt WHERE ds='2008-04-09' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big_txt +POSTHOOK: Input: default@bucket_big_txt@ds=2008-04-09 +POSTHOOK: Output: default@bucket_big@ds=2008-04-09 +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-09).key SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: bucket_big PARTITION(ds=2008-04-09).value SIMPLE [(bucket_big_txt)bucket_big_txt.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter +explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_small + a + TOK_TABREF + TOK_TABNAME + bucket_big + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + BucketMapJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Execution mode: vectorized + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + BucketMapJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Execution mode: vectorized + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38 +PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + bucket_big + a + TOK_TABREF + TOK_TABNAME + bucket_small + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTIONSTAR + count + + +STAGE DEPENDENCIES: + Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 + Stage-6 has a backup stage: Stage-1 + Stage-3 depends on stages: Stage-6 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-1 + Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + +STAGE PLANS: + Stage: Stage-5 + Conditional Operator + + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 10 + partition_columns ds + partition_columns.types string + rawDataSize 1760 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 732 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_small + name: default.bucket_small + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 10 + partition_columns ds + partition_columns.types string + rawDataSize 1760 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 732 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Execution mode: vectorized + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Partition Description: + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 10 + partition_columns ds + partition_columns.types string + rawDataSize 1760 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 732 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Execution mode: vectorized + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1000 Data size: 176000 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + BucketMapJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 500 + partition_columns ds + partition_columns.types string + rawDataSize 88000 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 3491 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket_big +PREHOOK: Input: default@bucket_big@ds=2008-04-08 +PREHOOK: Input: default@bucket_big@ds=2008-04-09 +PREHOOK: Input: default@bucket_small +PREHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket_big +POSTHOOK: Input: default@bucket_big@ds=2008-04-08 +POSTHOOK: Input: default@bucket_big@ds=2008-04-09 +POSTHOOK: Input: default@bucket_small +POSTHOOK: Input: default@bucket_small@ds=2008-04-08 +#### A masked pattern was here #### +38