diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index f66e19be3e..5360852d85 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -227,6 +227,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ selectDistinctStar.q,\ select_dummy_source.q,\ skewjoin.q,\ + smb_join1.q,\ stats_noscan_1.q,\ stats_only_null.q,\ subquery_exists.q,\ @@ -585,6 +586,7 @@ minillaplocal.query.files=acid_globallimit.q,\ semijoin.q,\ semijoin_hint.q,\ smb_cache.q,\ + smb_join1.q,\ special_character_in_tabnames_1.q,\ sqlmerge.q,\ stats_based_fetch_decision.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java index add7d08c40..d440aeaa8f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordSource.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.tez; import java.io.IOException; + +import org.apache.hadoop.hive.ql.exec.tez.tools.KeyValueInputMerger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.AbstractMapOperator; @@ -46,6 +48,10 @@ void init(JobConf jconf, AbstractMapOperator mapOp, KeyValueReader reader) throws IOException { execContext = mapOp.getExecContext(); this.mapOp = mapOp; + if (reader instanceof KeyValueInputMerger) { + KeyValueInputMerger kvMerger = (KeyValueInputMerger) reader; + kvMerger.setIOCxt(execContext.getIoCxt()); + } this.reader = reader; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValueInputMerger.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValueInputMerger.java index 698fa7f69e..b3030b2308 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValueInputMerger.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/tools/KeyValueInputMerger.java @@ -25,8 +25,13 @@ import java.util.Map; import java.util.PriorityQueue; +import org.apache.hadoop.hive.ql.io.HiveInputFormat; +import org.apache.hadoop.hive.ql.io.IOContext; +import org.apache.hadoop.mapred.split.TezGroupedSplit; +import org.apache.tez.mapreduce.lib.MRReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -35,6 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.InputSplit; import org.apache.tez.runtime.library.api.KeyValueReader; /** @@ -50,12 +56,16 @@ public static final Logger l4j = LoggerFactory.getLogger(KeyValueInputMerger.class); private PriorityQueue pQueue = null; private KeyValueReader nextKVReader = null; + private KeyValueReader prevKVReader = null; private ObjectInspector[] inputObjInspectors = null; private Deserializer deserializer = null; private List structFields = null; private List fieldOIs = null; private final Map> kvReaderStandardObjMap = new HashMap>(); + private final Map kvReaderPathMap = + new HashMap<>(); + private IOContext ioCxt = null; public KeyValueInputMerger(List multiMRInputs, Deserializer deserializer, ObjectInspector[] inputObjInspectors, List sortCols) throws Exception { @@ -76,11 +86,22 @@ public KeyValueInputMerger(List multiMRInputs, Deserializer dese } l4j.info("Initialized the priority queue with multi mr inputs: " + multiMRInputs.size()); for (KeyValueReader input : multiMRInputs) { + TezGroupedSplit split = (TezGroupedSplit) ((MRReader) input).getSplit(); + List splits = split.getGroupedSplits(); + Path path = ((HiveInputFormat.HiveInputSplit) splits.get(0)).getPath(); + kvReaderPathMap.put(input, path); addToQueue(input); } } /** + * + */ + public void setIOCxt(IOContext ioCxt) { + this.ioCxt = ioCxt; + } + + /** * Add KeyValueReader to queue if it has more key-value * * @param kvReader @@ -106,7 +127,15 @@ public boolean next() throws IOException { //get the new nextKVReader with lowest key nextKVReader = pQueue.poll(); - return nextKVReader != null; + if (nextKVReader == null) + return false; + + if (nextKVReader != prevKVReader) { + prevKVReader = nextKVReader; + // update path in IOContext + ioCxt.setInputPath(kvReaderPathMap.get(nextKVReader)); + } + return true; } @Override diff --git a/ql/src/test/queries/clientpositive/smb_join1.q b/ql/src/test/queries/clientpositive/smb_join1.q new file mode 100644 index 0000000000..e54b8357ed --- /dev/null +++ b/ql/src/test/queries/clientpositive/smb_join1.q @@ -0,0 +1,67 @@ +set hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled=true; +SET hive.exec.orc.default.buffer.size=32768; +SET hive.exec.orc.default.row.index.stride=1000; +SET hive.optimize.index.filter=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + +DROP TABLE orc_a; +DROP TABLE orc_b; + +CREATE TABLE orc_a (id bigint, cdouble double) partitioned by (y int, q smallint) +CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc; +CREATE TABLE orc_b (id bigint, cfloat float) +CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc; + +insert into table orc_a partition (y=2000, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 650; + +insert into table orc_b +select cbigint, cfloat from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 4; + +set hive.cbo.enable=false; +set hive.llap.io.enabled=false; +explain +select * from orc_a a join orc_b b on a.id=b.id; +select * from orc_a a join orc_b b on a.id=b.id; + +explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; + + +set hive.enforce.sortmergebucketmapjoin=false; +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge=true; +set hive.auto.convert.sortmerge.join=true; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=10; +set hive.cbo.enable=true; + +explain +select * from orc_a a join orc_b b on a.id=b.id; +select * from orc_a a join orc_b b on a.id=b.id; + +explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; + + +-- add a new top level partition +insert into table orc_a partition (y=2001, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 700; + +explain +select * from orc_a a join orc_b b on a.id=b.id; +select * from orc_a a join orc_b b on a.id=b.id; + +explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q; + +DROP TABLE orc_a; +DROP TABLE orc_b; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/smb_join1.q.out b/ql/src/test/results/clientpositive/llap/smb_join1.q.out new file mode 100644 index 0000000000..a2c440f5c6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/smb_join1.q.out @@ -0,0 +1,855 @@ +PREHOOK: query: DROP TABLE orc_a +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_a +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orc_b +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_b +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_a (id bigint, cdouble double) partitioned by (y int, q smallint) +CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_a +POSTHOOK: query: CREATE TABLE orc_a (id bigint, cdouble double) partitioned by (y int, q smallint) +CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_a +PREHOOK: query: CREATE TABLE orc_b (id bigint, cfloat float) +CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_b +POSTHOOK: query: CREATE TABLE orc_b (id bigint, cfloat float) +CLUSTERED BY (id) SORTED BY (id) INTO 2 BUCKETS stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_b +PREHOOK: query: insert into table orc_a partition (y=2000, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 650 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_a@y=2000 +POSTHOOK: query: insert into table orc_a partition (y=2000, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 650 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_a@y=2000/q=0 +POSTHOOK: Output: default@orc_a@y=2000/q=1 +POSTHOOK: Output: default@orc_a@y=2000/q=2 +POSTHOOK: Output: default@orc_a@y=2000/q=3 +POSTHOOK: Output: default@orc_a@y=2000/q=4 +POSTHOOK: Output: default@orc_a@y=2000/q=5 +POSTHOOK: Output: default@orc_a@y=2000/q=6 +POSTHOOK: Output: default@orc_a@y=2000/q=7 +POSTHOOK: Output: default@orc_a@y=2000/q=8 +POSTHOOK: Output: default@orc_a@y=2000/q=9 +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=0).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=0).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=1).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=2).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=3).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=4).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=4).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=5).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=5).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=6).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=6).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=7).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=7).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=8).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=8).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=9).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2000,q=9).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +PREHOOK: query: insert into table orc_b +select cbigint, cfloat from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_b +POSTHOOK: query: insert into table orc_b +select cbigint, cfloat from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_b +POSTHOOK: Lineage: orc_b.cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: orc_b.id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +PREHOOK: query: explain +select * from orc_a a join orc_b b on a.id=b.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from orc_a a join orc_b b on a.id=b.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 15600 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: id (type: bigint) + sort order: + + Map-reduce partition columns: id (type: bigint) + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: cdouble (type: double), y (type: int), q (type: smallint) + Execution mode: vectorized, llap + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: bigint) + sort order: + + Map-reduce partition columns: id (type: bigint) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: cfloat (type: float) + Execution mode: vectorized, llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: bigint) + 1 id (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col8 + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: int), _col3 (type: smallint), _col7 (type: bigint), _col8 (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from orc_a a join orc_b b on a.id=b.id +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_a a join orc_b b on a.id=b.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +-2143024405 15601.0 2000 1 -2143024405 -29.0 +-2142615863 15601.0 2000 1 -2142615863 -4.0 +-2141336536 15601.0 2000 1 -2141336536 26.0 +-2136727102 15601.0 2000 1 -2136727102 1.0 +PREHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +POSTHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 15600 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: id (type: bigint) + sort order: + + Map-reduce partition columns: id (type: bigint) + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: y (type: int), q (type: smallint) + Execution mode: vectorized, llap + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: bigint) + sort order: + + Map-reduce partition columns: id (type: bigint) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 id (type: bigint) + 1 id (type: bigint) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: int), _col3 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint) + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +2000 1 4 +PREHOOK: query: explain +select * from orc_a a join orc_b b on a.id=b.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from orc_a a join orc_b b on a.id=b.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), cfloat (type: float) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 15600 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: id (type: bigint), cdouble (type: double), y (type: int), q (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from orc_a a join orc_b b on a.id=b.id +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_a a join orc_b b on a.id=b.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +-2143024405 15601.0 2000 1 -2143024405 -29.0 +-2142615863 15601.0 2000 1 -2142615863 -4.0 +-2141336536 15601.0 2000 1 -2141336536 26.0 +-2136727102 15601.0 2000 1 -2136727102 1.0 +PREHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +POSTHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 15600 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: id (type: bigint), y (type: int), q (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 650 Data size: 5200 Basic stats: COMPLETE Column stats: PARTIAL + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int), _col2 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint) + Statistics: Num rows: 715 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +2000 1 4 +PREHOOK: query: insert into table orc_a partition (y=2001, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 700 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_a@y=2001 +POSTHOOK: query: insert into table orc_a partition (y=2001, q) +select cbigint, cdouble, csmallint % 10 from alltypesorc + where cbigint is not null and csmallint > 0 order by cbigint asc limit 700 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_a@y=2001/q=0 +POSTHOOK: Output: default@orc_a@y=2001/q=1 +POSTHOOK: Output: default@orc_a@y=2001/q=2 +POSTHOOK: Output: default@orc_a@y=2001/q=3 +POSTHOOK: Output: default@orc_a@y=2001/q=4 +POSTHOOK: Output: default@orc_a@y=2001/q=5 +POSTHOOK: Output: default@orc_a@y=2001/q=6 +POSTHOOK: Output: default@orc_a@y=2001/q=7 +POSTHOOK: Output: default@orc_a@y=2001/q=8 +POSTHOOK: Output: default@orc_a@y=2001/q=9 +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=0).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=0).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=1).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=1).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=2).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=2).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=3).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=3).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=4).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=4).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=5).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=5).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=6).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=6).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=7).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=7).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=8).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=8).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=9).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: orc_a PARTITION(y=2001,q=9).id SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +PREHOOK: query: explain +select * from orc_a a join orc_b b on a.id=b.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from orc_a a join orc_b b on a.id=b.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), cfloat (type: float) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 1350 Data size: 32400 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1350 Data size: 10800 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: id (type: bigint), cdouble (type: double), y (type: int), q (type: smallint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1350 Data size: 10800 Basic stats: COMPLETE Column stats: PARTIAL + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1485 Data size: 11880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1485 Data size: 11880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from orc_a a join orc_b b on a.id=b.id +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_a@y=2001/q=0 +PREHOOK: Input: default@orc_a@y=2001/q=1 +PREHOOK: Input: default@orc_a@y=2001/q=2 +PREHOOK: Input: default@orc_a@y=2001/q=3 +PREHOOK: Input: default@orc_a@y=2001/q=4 +PREHOOK: Input: default@orc_a@y=2001/q=5 +PREHOOK: Input: default@orc_a@y=2001/q=6 +PREHOOK: Input: default@orc_a@y=2001/q=7 +PREHOOK: Input: default@orc_a@y=2001/q=8 +PREHOOK: Input: default@orc_a@y=2001/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select * from orc_a a join orc_b b on a.id=b.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_a@y=2001/q=0 +POSTHOOK: Input: default@orc_a@y=2001/q=1 +POSTHOOK: Input: default@orc_a@y=2001/q=2 +POSTHOOK: Input: default@orc_a@y=2001/q=3 +POSTHOOK: Input: default@orc_a@y=2001/q=4 +POSTHOOK: Input: default@orc_a@y=2001/q=5 +POSTHOOK: Input: default@orc_a@y=2001/q=6 +POSTHOOK: Input: default@orc_a@y=2001/q=7 +POSTHOOK: Input: default@orc_a@y=2001/q=8 +POSTHOOK: Input: default@orc_a@y=2001/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +-2143024405 15601.0 2001 1 -2143024405 -29.0 +-2143024405 15601.0 2000 1 -2143024405 -29.0 +-2142615863 15601.0 2001 1 -2142615863 -4.0 +-2142615863 15601.0 2000 1 -2142615863 -4.0 +-2141336536 15601.0 2000 1 -2141336536 26.0 +-2141336536 15601.0 2001 1 -2141336536 26.0 +-2136727102 15601.0 2000 1 -2136727102 1.0 +-2136727102 15601.0 2001 1 -2136727102 1.0 +PREHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +POSTHOOK: query: explain +select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 1350 Data size: 32400 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1350 Data size: 10800 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: id (type: bigint), y (type: int), q (type: smallint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1350 Data size: 10800 Basic stats: COMPLETE Column stats: PARTIAL + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1485 Data size: 11880 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int), _col2 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1485 Data size: 11880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: smallint) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint) + Statistics: Num rows: 1485 Data size: 11880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: llap + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 742 Data size: 5936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 742 Data size: 5936 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_a +PREHOOK: Input: default@orc_a@y=2000/q=0 +PREHOOK: Input: default@orc_a@y=2000/q=1 +PREHOOK: Input: default@orc_a@y=2000/q=2 +PREHOOK: Input: default@orc_a@y=2000/q=3 +PREHOOK: Input: default@orc_a@y=2000/q=4 +PREHOOK: Input: default@orc_a@y=2000/q=5 +PREHOOK: Input: default@orc_a@y=2000/q=6 +PREHOOK: Input: default@orc_a@y=2000/q=7 +PREHOOK: Input: default@orc_a@y=2000/q=8 +PREHOOK: Input: default@orc_a@y=2000/q=9 +PREHOOK: Input: default@orc_a@y=2001/q=0 +PREHOOK: Input: default@orc_a@y=2001/q=1 +PREHOOK: Input: default@orc_a@y=2001/q=2 +PREHOOK: Input: default@orc_a@y=2001/q=3 +PREHOOK: Input: default@orc_a@y=2001/q=4 +PREHOOK: Input: default@orc_a@y=2001/q=5 +PREHOOK: Input: default@orc_a@y=2001/q=6 +PREHOOK: Input: default@orc_a@y=2001/q=7 +PREHOOK: Input: default@orc_a@y=2001/q=8 +PREHOOK: Input: default@orc_a@y=2001/q=9 +PREHOOK: Input: default@orc_b +#### A masked pattern was here #### +POSTHOOK: query: select y,q,count(*) from orc_a a join orc_b b on a.id=b.id group by y,q +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_a +POSTHOOK: Input: default@orc_a@y=2000/q=0 +POSTHOOK: Input: default@orc_a@y=2000/q=1 +POSTHOOK: Input: default@orc_a@y=2000/q=2 +POSTHOOK: Input: default@orc_a@y=2000/q=3 +POSTHOOK: Input: default@orc_a@y=2000/q=4 +POSTHOOK: Input: default@orc_a@y=2000/q=5 +POSTHOOK: Input: default@orc_a@y=2000/q=6 +POSTHOOK: Input: default@orc_a@y=2000/q=7 +POSTHOOK: Input: default@orc_a@y=2000/q=8 +POSTHOOK: Input: default@orc_a@y=2000/q=9 +POSTHOOK: Input: default@orc_a@y=2001/q=0 +POSTHOOK: Input: default@orc_a@y=2001/q=1 +POSTHOOK: Input: default@orc_a@y=2001/q=2 +POSTHOOK: Input: default@orc_a@y=2001/q=3 +POSTHOOK: Input: default@orc_a@y=2001/q=4 +POSTHOOK: Input: default@orc_a@y=2001/q=5 +POSTHOOK: Input: default@orc_a@y=2001/q=6 +POSTHOOK: Input: default@orc_a@y=2001/q=7 +POSTHOOK: Input: default@orc_a@y=2001/q=8 +POSTHOOK: Input: default@orc_a@y=2001/q=9 +POSTHOOK: Input: default@orc_b +#### A masked pattern was here #### +2001 1 4 +2000 1 4 +PREHOOK: query: DROP TABLE orc_a +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_a +PREHOOK: Output: default@orc_a +POSTHOOK: query: DROP TABLE orc_a +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_a +POSTHOOK: Output: default@orc_a +PREHOOK: query: DROP TABLE orc_b +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_b +PREHOOK: Output: default@orc_b +POSTHOOK: query: DROP TABLE orc_b +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_b +POSTHOOK: Output: default@orc_b